Generators

Generators

Provide an easy way to implement an iterator without the complexity of writing a class implementing \Iterator.

Generators

Let's take an example to introduce Generators.

Issue

Fetch emails from a file emails.dat  which contains

~ 200k emails. On each email we need to apply a transformation (in the case strtolower()).

Generators

Solution #1

<?php

namespace Generators;

class EmailArray
{
    public function getEmails()
    {
        $file = fopen('emails.dat', 'r');
        $emails = [];

        while (!feof($file)) {
            $emails[] = $this->transformEmail(fgets($file));
        }

        return $emails;
    }

    public function transformEmail($email)
    {
        return strtolower($email);
    }
}

Solution 1 is to loop over emails and store transformed emails inside an array.

Generators

Solution #1

<?php

require 'vendor/autoload.php';

$emailArray = new \Generators\EmailArray();

foreach ($emailArray->getEmails() as $email) {
    //
}

Solution 1 is to loop over emails and store transformed emails inside an array.

Generators

Solution #2

<?php

namespace Generators;

/**
 * Class EmailIterator
 *
 * @package Generators
 */
class EmailIterator implements \Iterator
{
    /**
     * @var \SplFileObject
     */
    protected $file;

    /**
     * @var int
     */
    protected $numberOfLines;

    public function __construct()
    {
        $this->file          = new \SplFileObject('emails.dat', 'r');
        $this->numberOfLines = 0;

        while (!$this->file->eof()) {
            $this->file->fgets();

            $this->numberOfLines = $this->file->key();
        }
    }

    public function current()
    {
        return $this->transformEmail($this->file->current());
    }

    public function next()
    {
        $this->file->fgets();
    }

    public function key()
    {
        return $this->file->key();
    }

    public function valid()
    {
        return $this->file->key() < $this->numberOfLines;
    }

    public function rewind()
    {
        $this->file->rewind();
    }

    public function transformEmail($email)
    {
        return strtolower($email);
    }
}

Solution 2 is to implement an Iterator.

Generators

Solution #2

<?php

require 'vendor/autoload.php';

$emailIterator = new \Generators\EmailIterator();

foreach ($emailIterator as $email) {
    //
}

Solution 2 is to implement an Iterator.

Generators

Two solutions comparison

Solution #1 Solution #2 Difference
Time 1.46 s 3.71 s + 154 %
Memory 38.1 MB 105 kB - 99.7 %

Generators

Generators

Solution #3

<?php

namespace Generators;

class EmailGenerator
{
    public function getEmails()
    {
        $file = fopen('emails.dat', 'r');

        while (!feof($file)) {
            yield $this->transformEmail(fgets($file));
        }
    }

    public function transformEmail($email)
    {
        return strtolower($email);
    }
}

Solution 3 is to use a generator.

Generators

Solution #3

<?php

require 'vendor/autoload.php';

$emailGenerator = new \Generators\EmailGenerator();

foreach ($emailGenerator->getEmails() as $email) {
    //
}

Solution 3 is to use a generator.

Generators

Three solutions comparison

Solution #1 Solution #2 Solution #3
Time 1.46 s 3.71 s 1.53 s
Memory 38.1 MB 105 kB 230 kB

Generators

Let's take an example to introduce Generators.

Issue

What if we need to fetch from two different files now?

Generators

Array

<?php

namespace Generators;

class EmailArray
{
    public function getEmails()
    {
        $emails = [];

        $emails = $this->getEmailsFromFile('emails.dat', $emails);
        $emails = $this->getEmailsFromFile('emails2.dat', $emails);

        return $emails;
    }

    public function getEmailsFromFile($fileName, array $emails = [])
    {
        $file = fopen($fileName, 'r');

        while (!feof($file)) {
            $emails[] = $this->transformEmail(fgets($file));
        }

        return $emails;
    }

    public function transformEmail($email)
    {
        return strtolower($email);
    }
}

Generators

Generator

<?php

namespace Generators;

class EmailGenerator
{
    public function getEmails()
    {
        yield from $this->getEmailsFromFirstFile('emails.dat');
        yield from $this->getEmailsFromFirstFile('emails2.dat');
    }

    public function getEmailsFromFile($fileName)
    {
        $file = fopen($fileName, 'r');

        while (!feof($file)) {
            yield $this->transformEmail(fgets($file));
        }
    }

    public function transformEmail($email)
    {
        return strtolower($email);
    }
}

Generators

Internally

<?php

function xrange($start, $end, $step = 1) {
    for ($i = $start; $i <= $end; $i += $step) {
        yield $i;
    }
}

foreach (xrange(1, 1000000) as $num) {
    echo $num, "\n";
}

#1

<?php

foreach (range(1, 1000000) as $num) {
    echo $num, "\n";
}

#2

Let's take the example of range()

526 ms

144 MB

855 ms

18 kB

Generators

Internally

Time Memory
range() 526 ms 144 MB
xrange() 855 ms 18 kB
Difference + 62.54 % - 799900 %
Explanation range() is faster because it's an internal PHP function range() requires more memory because it builds an in-memory array while xrange() returns an iterator

Generators

Internally

xrange() actually just returns an instance of Generator (that implements \Iterator)

<?php

function xrange($start, $end, $step = 1) {
    for ($i = $start; $i <= $end; $i += $step) {
        yield $i;
    }
}

$range = xrange(1, 1000000);

var_dump(get_class($range)); // object(Generator)
var_dump($range instanceof \Iterator); // bool(true)

Generators

Internally

What happens if we double the range ?

range(1, 1000000) range(1, 2000000) xrange(1, 1000000) xrange(1, 2000000)
Time 526 ms 1120 ms 890 ms 1700 ms
Memory 144 MB 275 MB 17.6 kb 17.6 kB

x 2

x 2

x 2

x 1

For both range() and xrange() time increases linearly.

 

For range() memory increases linearly.

 

For xrange() memory does not increase.

Generators

Internally

Generators = = Iterator easy to implement

<?php

function rows() {
    yield 'test';
    yield 'test2';
    yield 'test3';
}

$rows = rows();

foreach ($rows as $row) {
   var_dump($row);
}
<?php

function rows() {
    yield 'test';
    yield 'test2';
    yield 'test3';
}

$rows = rows();

var_dump($rows->current());

$rows->next();

var_dump($rows->current());

$rows->next();

var_dump($rows->current());

Generators

Internally

Generators are not Rewindable if closed or started

<?php

function rows() {
    yield 'test';
    yield 'test2';
    yield 'test3';
}

$rows = rows();

foreach ($rows as $row) {
   var_dump($row);
}

// This will throw an exception
$rows->rewind();

// This will throw an Exception
foreach ($rows as $row) {
   var_dump($row);
}

Generators

Internally

You can create infinite generators

<?php

function infinite() {
    while (true) {
        yield microtime();
    }
}

$infinite = infinite();

foreach ($infinite as $microtime) {
    var_dump($microtime); // Will output an infinite of microtime();
}

Generators

Internally

Generators can be chained

<?php

$values = [
    ['first_name' => 'Harry', 'last_name' => 'Potter', 'birthday' => '1980-08-31'],
    ['first_name' => 'Ron', 'last_name' => 'Weasley', 'birthday' => '1980-03-01'],
    ['first_name' => 'Hermione', 'last_name' => 'Granger', 'birthday' => '1979-09-19']
];

function first_conversion($values) {
    $id = 0;
    foreach ($values as $value) {
        $id++;
        yield array_merge(['id' => $id], $value);
    }
}

function second_conversion($values) {
    foreach ($values as $value) {
        $value['last_name'] = strtoupper($value['last_name']);
        yield $value;
    }
}

$toConverted = first_conversion(second_conversion($values));

foreach ($toConverted as $convertedData) {
    print_r($convertedData);
}

Generators

Sending data to Generator

<?php

function outputter() {
    echo yield;
}

$send = outputter();

$send->send('test'); // Will display "test"
$send->send('test2'); // Won't display anything as the Generator is closed

Coroutines are computer program components that generalize subroutines for nonpreemptive multitasking, by allowing multiple entry points for suspending and resuming execution at certain locations.

Generators

Sending data to Generator

<?php

function logger($fileName) {
    $fileHandle = fopen($fileName, 'a');
    while (true) {
        fwrite($fileHandle, yield . "\n");
    }
}

$logger = logger('log.txt');
$logger->send('Foo');
$logger->send('Bar');

// Will fill log.txt with two lines : Foo\n Bar\n

Generators

Real use cases : "Thread"

<?php

function step1()
{
    $f = fopen("file.txt", 'r');
    while ($line = fgets($f)) {
        echo $line;
        yield true;
    }
}

function step2()
{
    $f = fopen("file2.txt", 'r');
    while ($line = fgets($f)) {
        echo $line;
        yield true;
    }
}

function step3()
{
    $f = fopen("file3.txt", 'r');
    while ($line = fgets($f)) {
        echo $line;
        yield true;
    }
}

function runner(array $steps)
{
    while (true) {
        foreach ($steps as $key => $step) {
            $step->next();
            if (!$step->valid()) {
                unset($steps[$key]);
            }
        }
        if (empty($steps)) return;
    }
}

runner([step1(), step2(), step3()]);

Generators

Real use cases : "DataProvider"

<?php

class  SomethingTest extends PHPUnit_Framework_TestCase
{
    /**
     * @dataProvider addressesProvider
     */
    public function testSomethingThatRequiresAnAddress(Address $address)
    {
        // ...
    }

    public function addressesProvider()
    {
        for ($i = 0; $i < 10; $i++) {
            yield [
                new Address (
                // Random string with length between 8 and 16
                    substr(
                        str_shuffle('abcdefghijklmnopqrstuvwxyz'),
                        0,
                        rand(8, 16)
                    ),

                    // Random five digit number
                    sprintf('%05d', rand(1, 99999)),

                    // Random string with length between 8 and 16
                    substr(
                        str_shuffle('abcdefghijklmnopqrstuvwxyz'),
                        0,
                        rand(8, 16)
                    ),

                    // Random string with length 2
                    substr(
                        str_shuffle('abcdefghijklmnopqrstuvwxyz'),
                        0,
                        2
                    )
                )
            ];
        }
    }
}

Generators

Real use cases : "Generator Control"

<?php

function nums() {
    for ($i = 0; $i < 5; ++$i) {
        $cmd = (yield $i);
        
        if ($cmd == 'stop') {
            return;
        }
    }
}

$gen = nums();

foreach ($gen as $v) {
    // we are satisfied
    if ($v == 3) {
        $gen->send('stop');
    }
    echo "{$v}\n";
}

Generators

Real use cases : "Fibonacci"

<?php

function fibonacci($count)
{
    $prev    = 0;
    $current = 1;

    for ($i = 0; $i < $count; ++$i) {
        yield $prev;
        $next    = $prev + $current;
        $prev    = $current;
        $current = $next;
    }
}

foreach (fibonacci(48) as $i => $value) {
    echo $i, ' -> ', $value, PHP_EOL;
}

Generators

Real use cases : "Obfuscation"

<?php

class Repository
{
    public function getData()
    {
        foreach ($this->manager->getAll() as $data) {
            yield $this->parseData($data);
        }
    }
    
    protected function parseData($data)
    {
        $data->setSensitiveData(null);
    }
}

$repository = new Repository();

foreach ($repository->getData() as $data) {
    var_dump($data); // No sensitive data
}

Generators

Generators

https://nikic.github.io/2012/12/22/Cooperative-multitasking-using-coroutines-in-PHP.html

Generators

By Babacooll

Generators

  • 1,039