Generators & Iterators &

Some functional stuff

Who am I?

  • Andrii Ursulenko
  • 3 years in bank as Java developer/Team lead
  • Working in PromUA since 2013
  • Team Lead in PromUA since 2014
  • Coding not only in python (Go, Clojure)
  • Took part in prom family, kabanchik.com.ua, goodini.ua
  • github.com/warik
  • a.ursulenko@gmail.com

Disclaimer

  • My opinion is only mine
  • Main goal is to be sure that You know something valuable not just tell some weird stuff You've no clue about
  • I want You to become part of EVO

What we will talk about?

  • Generators - why and how
  • What is an iterator and why it is one of the greatest things in python
  • Functional programming: what, why and how

Lets start with few question

  • who knows what is iterator in python?
  • who knows what is generator in python and how it works?
  • who knows what is functional approach in programming and which features of it python has?

Generators

What is generator?

Python2
gen = xrange(...)

Python3
gen = range(...)
  • just an iterator in common sense
  • yields value
  • "semicoroutine"

Simplest generator ever in python

Pros

  • gives one value at a time
  • preserves its state
  • its cheap
def loop(x):
    for n in x:
        pass


def gen(x):
    while x:
        yield x
        x -= 1
        

from timeit import timeit

timeit('loop(s)', 'from __main__ import loop; s = range(100)', number=10000)
0.019029855728149414

timeit('loop(s)', 'from __main__ import loop; s = xrange(100)', number=10000)
0.01918196678161621

timeit('loop(s)', 'from __main__ import loop, gen; s = gen(100)', number=10000)
0.0019719600677490234

Cons

  • its not always applicable

Forms of creation

# easiest and most commonly used
s = xrange(...)  # range(...) in py3

# generator expression
s = (x ** 2 for x in range(100) if not (x % 2 or x % 3))

# function
def fibo():
    a, b = 0, 1
    while 1:
        yield a
        a, b = b, a + b

# class
class fibo_n(object):

    def __init__(self, n):
        self.first, self.second = 0, 1
        self.elements_count = n

    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def next(self):
        if not self.elements_count:
            raise StopIteration()

        ret = self.first
        self.first, self.second = self.second, self.first + self.second
        self.elements_count -= 1
        return ret
            

Next? next(), next or __next__ ?

next(iterator, [default])

Retrieve the next item from the iterator by calling its next() method. If default is given, it is returned if the iterator is exhausted, otherwise StopIteration is raised.

s = xrange(3)
next(s)
>>> 0
next(s)
>>> 1
next(s)
>>> 2
next(s)
>>> StopIteration
class python_2:

    def next(self):
        if have_more_values:
            return smth
        raise StopIteration()


class python_3:

    def __next__(self):
        if have_more_values:
            return smth
        raise StopIteration()
def get_required(values):
    '''Returns first value which fits condition
       None - if there are no such
    ''' 
    needed_value = None
    for value in values:
        if value.some_condition() and value.amount > 200:
            needed_value = value
            break

    if needed_value:
        do_some_other_stuff()


def get_required(values):
    '''Returns first value which fits condition
       None - if there are no such
    '''
    needed_value = next(
        (v for v in values if value.some_condition() and value.amount > 200),
        None
    )
    if needed_value:
        do_some_other_stuff()

Great example about next() with default value

Features

def _get_cold_companies_ids(count):
    cold_companies = (
        database.query
        .limit(count)
        .all()
    )
    _from = 0
    while True:
        number = yield
        yield cold_companies[_from:_from + number]
        _from = number


def cold_companies_distribution():
    cold_companies_gen = _get_cold_companies_ids(companies_required)
    for agency_id, cold_companies_limit in sorted(agencies, key=lambda a: a[1]):
        cold_companies_gen.next()
        cold_companies_ids = cold_companies_gen.send(cold_companies_limit)
        if not cold_companies_ids:
            break
  • send(value)
  • close()
def endless():
    while 1:
        yield 1

f = endless()
next(f)
>>> 1
next(f)
>>> 1
f.close()
next(f)
>>> StopIteration
  • throw(type, [value, [traceback]])
def gen_exc():
    try:
        a = yield
    except Exception, e:
        print e


s = gen_exc()
s.throw(Exception, 'exception in generator!')
>>> exception in generator!
>>> StopIteration

len? in?

len(xrange(10))  # ?
>>> 10


def custom_range(n):
    i = 0
    while n:
        yield i
        i, n = i + 1, n - 1

len(custom_range(10))  # ?
>>> TypeError: object of type 'generator' has no len()

1 in xrange(10)  # ?
>>> True

1 in custom_range(10)  # ?
>>> True

s = custom_range(10)
11 in s
>>> False

next(s)
>>> StopIteration
# Example 1
def inner_gen():
    yield 'a'
    yield 'b'


def outer_gen():
    yield inner_gen()

next(outer_gen())
>>> <generator object inner_gen at 0x10731aa68>

# Expected
>>> 'a'


# Possible solution
def outer_gen():
    for v in inner_gen():
        yield v

next(outer_gen())
>>> 'a'
# Example 2 - more close to real life
def get_active_users():
    query = (
        session.query(User)
        .filter(User.condition_is_status_active())
    )
    for user in query:
        yield user


def get_blocked_users():
    query = (
        session.query(User)
        .join(BlockRecord)
        .filter(BlockRecord.status == BlockRecord.ACTIVE)
    )
    for user in query:
        yield user


def get_active_n_blocked():
    for user in get_active_users():
        yield user
    for user in get_blocked_users():
        yield user

But what about close, send and throw?

_i = iter(EXPR)
try:
    _y = next(_i)
except StopIteration as _e:
    _r = _e.value
else:
    while 1:
        try:
            _s = yield _y
        except GeneratorExit as _e:
            try:
                _m = _i.close
            except AttributeError:
                pass
            else:
                _m()
            raise _e
        except BaseException as _e:
            _x = sys.exc_info()
            try:
                _m = _i.throw
            except AttributeError:
                raise _e
            else:
                try:
                    _y = _m(*_x)
                except StopIteration as _e:
                    _r = _e.value
                    break
        else:
            try:
                if _s is None:
                    _y = next(_i)
                else:
                    _y = _i.send(_s)
            except StopIteration as _e:
                _r = _e.value
                break

PEP 0380 introduces new syntax:

yield from!

yield from?! really?

# Example 2 - more close to real life
def get_active_users():
    query = (
        session.query(User)
        .filter(User.condition_is_status_active())
    )
    for user in query:
        yield user


def get_blocked_users():
    query = (
        session.query(User)
        .join(BlockRecord)
        .filter(BlockRecord.status == BlockRecord.ACTIVE)
    )
    for user in query:
        yield user


def get_active_n_blocked():
    yield from get_active_users()
    yield from get_blocked_users()
#
#   Here is a binary tree that produces an inorder traversal
#   of its items when iterated over. (Courtesy of Scott Dial)
#

class BinaryTree:
  def __init__(self, left=None, us=None, right=None):
    self.left = left
    self.us = us
    self.right = right

  def __iter__(self):
    if self.left:
      yield from self.left
    if self.us:
      yield self.us
    if self.right:
      yield from self.right

#
#   For comparison, here is the same thing using for-loops
#   instead of yield-from.
#

class BinaryTree_ForLoop:
  def __init__(self, left=None, us=None, right=None):
    self.left = left
    self.us = us
    self.right = right

  def __iter__(self):
    if self.left:
      for node in self.left:
        yield node
    if self.us:
      yield self.us
    if self.right:
      for node in self.right:
        yield node

Iterator concept

for l in [1, 2, 3]:  # iterate over list
    print l

for t in (1, 2, 3):  # iterate over tuple
    print t

for c in 'abcde':  # iterate over string
    print c

for d in {'a': 1, 'b': 2}:  # iterate over dict keys
    print d

for i in xrange(10):  # iterate over generator
    print i

How it actually works? Iterator object FTW!

class Iterable(object):

    def __init__(self, count):
        self.count = count

    def __iter__(self):
        return Iterator(self.count)
class Iterator(object):

    def __init__(self, times):
        self.times = times

    def __next__(self):
        return self.next()

    def next(self):
        if not self.times:
            raise StopIteration()

        self.times -= 1
        return self.times
for n in Iterable(4):
    print n

>>> 3
>>> 2
>>> 1
>>> 0
class hellower(object):

    def __init__(self, n):
        self.hellos_count = n

    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def next(self):
        if not self.hellos_count:
            raise StopIteration()
        
        self.hellos_count -= 1
        return 'Hello!'


class hellower(object):

    def __init__(self, n):
        self.hellos_count = n

    def __iter__(self):
        return ('Hello!' for _ in xrange(self.hellos_count))


for c in hellower(3):
    print c

>>> Hello!
>>> Hello!
>>> Hello!
class OrderedList(object):

    def __init__(self, _list=None):
        t_values = []
        try:
            t_values = list(_list)
        except Exception:
            pass
        self.values = t_values
        super(OrderedList, self).__init__()

    def append(self, val):
        self.values.append(val)

    def __repr__(self):
        return str(sorted(self.values))

    def __iter__(self):
        return iter(sorted(self.values))


print OrderedList([1, 3, 6, 2])

>>> [1, 2, 3, 6]

for i in OrderedList((4, 2, 6, 1)):
    print i

>>> 1
>>> 2
>>> 4
>>> 6

More examples

Iterator != Iterable != Generator

  • Iterator - object by means of which You iterate, method next (python2) or __next__ (python3) MUST be implemented
  • Iterable - object over which You actually iterate, MUST implement method __iter__, which returns iterator. Iterable may be, but not obligatory, iterator itself
  • Generator - implements iterator protocol, so it is an iterator and iterable itself.

Couple of more relevant examples

class mmap(object):

    def __init__(self, f, coll):
        self.f = f
        self.coll = coll

    def __iter__(self):
        return (self.f(c) for c in self.coll)


print ''.join(mmap(lambda c: c.upper(), 'such an irrelevant text'))
>>> SUCH AN IRRELEVANT TEXT

print ''.join(mmap(lambda i: str(i + 10), (1, 2, 3, 4, 5)))
>>> 11, 12, 13, 14, 15
# sort_of_db = range(100)

class Users(object):

    def __init__(self, _all=False):
        self._all = _all

    def __iter__(self):
        if self._all:
            # return iter(sort_of_db)
            return session.query(User).all()

        # return Pager(None)
        return Pager(session.query(User))


for _u in Users(_all=True):
    print _u

>>> <User id=1>
>>> <User id=2>
>>> <User id=3>
>>> <User id=4>
>>> <User id=5>
...
>>> <User id=99999>


for _u in Users():
    print _u

>>> querying from db...
>>> <User id=1>
>>> <User id=2>
>>> <User id=3>
>>> querying from db...
>>> <User id=4>
>>> <User id=5>
>>> <User id=6>
>>> querying from db...
...
>>> <User id=99999>
class Pager(object):

    def __init__(self, query, per_page=3):
        self.query, self.per_page = query, per_page
        self.bucket, self.current, self.offset = [], 0, 0
        self.is_last_bucket = False

    def _is_last_item(self):
        return self.current == len(self.bucket)

    def _get_bucket(self):
        if self.is_last_bucket:
            return None

        print 'querying from db...'
        # bucket = sort_of_db[self.offset:self.offset + self.per_page]
        bucket = self.query.offset(self.offset).limit(self.per_page)
        self.offset, self.current = self.offset + len(bucket), 0
        self.is_last_bucket = len(bucket) < self.per_page
        return bucket

    def next(self):
        if self._is_last_item():
            self.bucket = self._get_bucket()

        if not self.bucket:
            raise StopIteration()

        ret = self.bucket[self.current]
        self.current += 1
        return ret

Extra... protocols... yeah, protocols

Must know protocols

  • Context manager
  • Descriptor
  • Decorator (sort of)

And some functional stuff)

Functional wut? What about OOP?

Yeah functional, now deal with it and lets talk about concepts

first-class and higher-order functions

def applicator(func, *args):
    return (func(arg) for arg in args)


def upper(s):
    return s.upper()


print ' '.join(applicator(upper, 'a', 'b', 'c', 'd'))

>>> A B C D

pure functions

# non pure function
def convert(converter, _list):
    for i, val in enumerate(_list):
        _list[i] = converter(val)


# pure function
def convert(converter, _list):
    return [converter(val) for val in _list]

recursion

def fibonacci(n, first=0, second=1):
    for i in range(n):
        yield first
        first, second = second, first + second


def fibonacci(n, first=0, second=1):
    if n == 1:
        return [first]
    else:
        return [first] + fibonacci(n - 1, second, first + second)

lambdas (anonymous functions)

def applicator(func, *args):
    return (func(arg) for arg in args)


print ' '.join(applicator(lambda x: x.upper(),
                          'a', 'b', 'c', 'd'))

>>> A B C D

map, filter - py3

imap, ifilter - py2

map(functioniterable...)

Return an iterator that applies function to every item of iterable, yielding the results.

def square(x):
    return x ** 2


list(map(square, (1, 2, 3)))

>>> [1, 4, 9]


# With lambdas
list(map(lambda x: x ** 2, (1, 2, 3)))

>>> [1, 4, 9]
def is_upper(letter):
    return letter.isupper()


list(filter(is_upper, 'aAbCdeF'))
>>> ['A', 'C', 'F']

# With lambdas
list(filter(lambda x: x.is_upper(), 'aAbCdeF'))
>>> ['A', 'C', 'F']

# Even more clever
list(filter(str.isupper, 'aAbCdeF'))
>>> ['A', 'C', 'F']

filter(functioniterable)

Construct an iterator from those elements of

iterable for which function returns true.

all, any, zip

all(not x % 2 for x in range(11))

>>> False


any(not x % 2 for x in range(11))

>>> True

dict(zip(range(5), 'abcde'))

>>> {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e'}
# How do not use zip
# 'abcde' -> 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e'
# s = 'abcde'

for i, x in zip(range(1, len(s) + 1), s):
    print('{}: {}'.format(i, x))


# Correct way
for i, x in enumerate(s, 1):
    print('{}: {}'.format(i, x))

functools.partial(func*args**keywords)

def multiplier(x, y):
    return x * y

multiplier(2, 3)

>>> 6


mult_by_2 = partial(multiplier, 2)
mult_by_2(3)

>>> 6

functools.reduce(functioniterable[initializer])

reduce(lambda x, y: x + y, [[1], [2, 3], [4, 5, 6]])

>>> [1, 2, 3, 4, 5, 6]

# Trick
sum([[1], [2, 3], [4, 5, 6]], [])

# Max on reduce
l = list(random.randint(1, 10) for _ in range(10))
reduce(lambda x, y: x if x >= y else y, l)
def register_user(email, name=''):
    print(email, '|',  name)


auto_register_user = partial(register_user, name='AUTO_REGISTRATION')
auto_register_user('aaa@aaa.aaa')

>>> aaa@aaa.aaa | AUTO_REGISTRATION

I hear You asking about immutable data structures... yeah...

Useful links

Your task

  • command line utility - pygrep
  • two parameters - file pattern and keyword
  • result - lines with path to file, containing keyword, just like grep)

Did You learn something new?

Made with Slides.com