Generators


Generators

Provide an easy way to implement an iterator without the complexity of writing a class implementing \Iterator.
Generators

Let's take an example to introduce Generators.
Issue
Fetch emails from a file emails.dat which contains
~ 200k emails. On each email we need to apply a transformation (in the case strtolower()).
Generators

Solution #1
<?php
namespace Generators;
class EmailArray
{
public function getEmails()
{
$file = fopen('emails.dat', 'r');
$emails = [];
while (!feof($file)) {
$emails[] = $this->transformEmail(fgets($file));
}
return $emails;
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Solution 1 is to loop over emails and store transformed emails inside an array.
Generators

Solution #1
<?php
require 'vendor/autoload.php';
$emailArray = new \Generators\EmailArray();
foreach ($emailArray->getEmails() as $email) {
//
}

Solution 1 is to loop over emails and store transformed emails inside an array.
Generators

Solution #2
<?php
namespace Generators;
/**
* Class EmailIterator
*
* @package Generators
*/
class EmailIterator implements \Iterator
{
/**
* @var \SplFileObject
*/
protected $file;
/**
* @var int
*/
protected $numberOfLines;
public function __construct()
{
$this->file = new \SplFileObject('emails.dat', 'r');
$this->numberOfLines = 0;
while (!$this->file->eof()) {
$this->file->fgets();
$this->numberOfLines = $this->file->key();
}
}
public function current()
{
return $this->transformEmail($this->file->current());
}
public function next()
{
$this->file->fgets();
}
public function key()
{
return $this->file->key();
}
public function valid()
{
return $this->file->key() < $this->numberOfLines;
}
public function rewind()
{
$this->file->rewind();
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Solution 2 is to implement an Iterator.
Generators

Solution #2
<?php
require 'vendor/autoload.php';
$emailIterator = new \Generators\EmailIterator();
foreach ($emailIterator as $email) {
//
}
Solution 2 is to implement an Iterator.

Generators

Two solutions comparison
Solution #1 | Solution #2 | Difference | |
---|---|---|---|
Time | 1.46 s | 3.71 s | + 154 % |
Memory | 38.1 MB | 105 kB | - 99.7 % |
Generators


Generators

Solution #3
<?php
namespace Generators;
class EmailGenerator
{
public function getEmails()
{
$file = fopen('emails.dat', 'r');
while (!feof($file)) {
yield $this->transformEmail(fgets($file));
}
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Solution 3 is to use a generator.
Generators

Solution #3
<?php
require 'vendor/autoload.php';
$emailGenerator = new \Generators\EmailGenerator();
foreach ($emailGenerator->getEmails() as $email) {
//
}

Solution 3 is to use a generator.
Generators

Three solutions comparison
Solution #1 | Solution #2 | Solution #3 | |
---|---|---|---|
Time | 1.46 s | 3.71 s | 1.53 s |
Memory | 38.1 MB | 105 kB | 230 kB |
Generators

Let's take an example to introduce Generators.
Issue
What if we need to fetch from two different files now?
Generators

Array
<?php
namespace Generators;
class EmailArray
{
public function getEmails()
{
$emails = [];
$emails = $this->getEmailsFromFile('emails.dat', $emails);
$emails = $this->getEmailsFromFile('emails2.dat', $emails);
return $emails;
}
public function getEmailsFromFile($fileName, array $emails = [])
{
$file = fopen($fileName, 'r');
while (!feof($file)) {
$emails[] = $this->transformEmail(fgets($file));
}
return $emails;
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Generators

Generator
<?php
namespace Generators;
class EmailGenerator
{
public function getEmails()
{
yield from $this->getEmailsFromFirstFile('emails.dat');
yield from $this->getEmailsFromFirstFile('emails2.dat');
}
public function getEmailsFromFile($fileName)
{
$file = fopen($fileName, 'r');
while (!feof($file)) {
yield $this->transformEmail(fgets($file));
}
}
public function transformEmail($email)
{
return strtolower($email);
}
}

Generators

Internally
<?php
function xrange($start, $end, $step = 1) {
for ($i = $start; $i <= $end; $i += $step) {
yield $i;
}
}
foreach (xrange(1, 1000000) as $num) {
echo $num, "\n";
}
#1
<?php
foreach (range(1, 1000000) as $num) {
echo $num, "\n";
}
#2
Let's take the example of range()
526 ms
144 MB
855 ms
18 kB
Generators

Internally
Time | Memory | |
---|---|---|
range() | 526 ms | 144 MB |
xrange() | 855 ms | 18 kB |
Difference | + 62.54 % | - 799900 % |
Explanation | range() is faster because it's an internal PHP function | range() requires more memory because it builds an in-memory array while xrange() returns an iterator |

Generators

Internally
xrange() actually just returns an instance of Generator (that implements \Iterator)
<?php
function xrange($start, $end, $step = 1) {
for ($i = $start; $i <= $end; $i += $step) {
yield $i;
}
}
$range = xrange(1, 1000000);
var_dump(get_class($range)); // object(Generator)
var_dump($range instanceof \Iterator); // bool(true)
Generators

Internally
What happens if we double the range ?
range(1, 1000000) | range(1, 2000000) | xrange(1, 1000000) | xrange(1, 2000000) | |
---|---|---|---|---|
Time | 526 ms | 1120 ms | 890 ms | 1700 ms |
Memory | 144 MB | 275 MB | 17.6 kb | 17.6 kB |
x 2
x 2
x 2
x 1
For both range() and xrange() time increases linearly.
For range() memory increases linearly.
For xrange() memory does not increase.
Generators

Internally
Generators = = Iterator easy to implement
<?php
function rows() {
yield 'test';
yield 'test2';
yield 'test3';
}
$rows = rows();
foreach ($rows as $row) {
var_dump($row);
}
<?php
function rows() {
yield 'test';
yield 'test2';
yield 'test3';
}
$rows = rows();
var_dump($rows->current());
$rows->next();
var_dump($rows->current());
$rows->next();
var_dump($rows->current());
Generators

Internally
Generators are not Rewindable if closed or started
<?php
function rows() {
yield 'test';
yield 'test2';
yield 'test3';
}
$rows = rows();
foreach ($rows as $row) {
var_dump($row);
}
// This will throw an exception
$rows->rewind();
// This will throw an Exception
foreach ($rows as $row) {
var_dump($row);
}
Generators

Internally
You can create infinite generators
<?php
function infinite() {
while (true) {
yield microtime();
}
}
$infinite = infinite();
foreach ($infinite as $microtime) {
var_dump($microtime); // Will output an infinite of microtime();
}
Generators

Internally
Generators can be chained
<?php
$values = [
['first_name' => 'Harry', 'last_name' => 'Potter', 'birthday' => '1980-08-31'],
['first_name' => 'Ron', 'last_name' => 'Weasley', 'birthday' => '1980-03-01'],
['first_name' => 'Hermione', 'last_name' => 'Granger', 'birthday' => '1979-09-19']
];
function first_conversion($values) {
$id = 0;
foreach ($values as $value) {
$id++;
yield array_merge(['id' => $id], $value);
}
}
function second_conversion($values) {
foreach ($values as $value) {
$value['last_name'] = strtoupper($value['last_name']);
yield $value;
}
}
$toConverted = first_conversion(second_conversion($values));
foreach ($toConverted as $convertedData) {
print_r($convertedData);
}
Generators

Sending data to Generator
<?php
function outputter() {
echo yield;
}
$send = outputter();
$send->send('test'); // Will display "test"
$send->send('test2'); // Won't display anything as the Generator is closed
Coroutines are computer program components that generalize subroutines for nonpreemptive multitasking, by allowing multiple entry points for suspending and resuming execution at certain locations.
Generators

Sending data to Generator
<?php
function logger($fileName) {
$fileHandle = fopen($fileName, 'a');
while (true) {
fwrite($fileHandle, yield . "\n");
}
}
$logger = logger('log.txt');
$logger->send('Foo');
$logger->send('Bar');
// Will fill log.txt with two lines : Foo\n Bar\n
Generators

Real use cases : "Thread"
<?php
function step1()
{
$f = fopen("file.txt", 'r');
while ($line = fgets($f)) {
echo $line;
yield true;
}
}
function step2()
{
$f = fopen("file2.txt", 'r');
while ($line = fgets($f)) {
echo $line;
yield true;
}
}
function step3()
{
$f = fopen("file3.txt", 'r');
while ($line = fgets($f)) {
echo $line;
yield true;
}
}
function runner(array $steps)
{
while (true) {
foreach ($steps as $key => $step) {
$step->next();
if (!$step->valid()) {
unset($steps[$key]);
}
}
if (empty($steps)) return;
}
}
runner([step1(), step2(), step3()]);
Generators

Real use cases : "DataProvider"
<?php
class SomethingTest extends PHPUnit_Framework_TestCase
{
/**
* @dataProvider addressesProvider
*/
public function testSomethingThatRequiresAnAddress(Address $address)
{
// ...
}
public function addressesProvider()
{
for ($i = 0; $i < 10; $i++) {
yield [
new Address (
// Random string with length between 8 and 16
substr(
str_shuffle('abcdefghijklmnopqrstuvwxyz'),
0,
rand(8, 16)
),
// Random five digit number
sprintf('%05d', rand(1, 99999)),
// Random string with length between 8 and 16
substr(
str_shuffle('abcdefghijklmnopqrstuvwxyz'),
0,
rand(8, 16)
),
// Random string with length 2
substr(
str_shuffle('abcdefghijklmnopqrstuvwxyz'),
0,
2
)
)
];
}
}
}
Generators

Real use cases : "Generator Control"
<?php
function nums() {
for ($i = 0; $i < 5; ++$i) {
$cmd = (yield $i);
if ($cmd == 'stop') {
return;
}
}
}
$gen = nums();
foreach ($gen as $v) {
// we are satisfied
if ($v == 3) {
$gen->send('stop');
}
echo "{$v}\n";
}
Generators

Real use cases : "Fibonacci"
<?php
function fibonacci($count)
{
$prev = 0;
$current = 1;
for ($i = 0; $i < $count; ++$i) {
yield $prev;
$next = $prev + $current;
$prev = $current;
$current = $next;
}
}
foreach (fibonacci(48) as $i => $value) {
echo $i, ' -> ', $value, PHP_EOL;
}
Generators

Real use cases : "Obfuscation"
<?php
class Repository
{
public function getData()
{
foreach ($this->manager->getAll() as $data) {
yield $this->parseData($data);
}
}
protected function parseData($data)
{
$data->setSensitiveData(null);
}
}
$repository = new Repository();
foreach ($repository->getData() as $data) {
var_dump($data); // No sensitive data
}
Generators


Generators

https://nikic.github.io/2012/12/22/Cooperative-multitasking-using-coroutines-in-PHP.html

Generators
By Babacooll
Generators
- 1,039