Provide an easy way to implement an iterator without the complexity of writing a class implementing \Iterator.
Let's take an example to introduce Generators.
Issue
Fetch emails from a file emails.dat which contains
~ 200k emails. On each email we need to apply a transformation (in the case strtolower()).
Solution #1
<?php
namespace Generators;
class EmailArray
{
public function getEmails()
{
$file = fopen('emails.dat', 'r');
$emails = [];
while (!feof($file)) {
$emails[] = $this->transformEmail(fgets($file));
}
return $emails;
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Solution 1 is to loop over emails and store transformed emails inside an array.
Solution #1
<?php
require 'vendor/autoload.php';
$emailArray = new \Generators\EmailArray();
foreach ($emailArray->getEmails() as $email) {
//
}
Solution 1 is to loop over emails and store transformed emails inside an array.
Solution #2
<?php
namespace Generators;
/**
* Class EmailIterator
*
* @package Generators
*/
class EmailIterator implements \Iterator
{
/**
* @var \SplFileObject
*/
protected $file;
/**
* @var int
*/
protected $numberOfLines;
public function __construct()
{
$this->file = new \SplFileObject('emails.dat', 'r');
$this->numberOfLines = 0;
while (!$this->file->eof()) {
$this->file->fgets();
$this->numberOfLines = $this->file->key();
}
}
public function current()
{
return $this->transformEmail($this->file->current());
}
public function next()
{
$this->file->fgets();
}
public function key()
{
return $this->file->key();
}
public function valid()
{
return $this->file->key() < $this->numberOfLines;
}
public function rewind()
{
$this->file->rewind();
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Solution 2 is to implement an Iterator.
Solution #2
<?php
require 'vendor/autoload.php';
$emailIterator = new \Generators\EmailIterator();
foreach ($emailIterator as $email) {
//
}
Solution 2 is to implement an Iterator.
Two solutions comparison
Solution #1 | Solution #2 | Difference | |
---|---|---|---|
Time | 1.46 s | 3.71 s | + 154 % |
Memory | 38.1 MB | 105 kB | - 99.7 % |
Solution #3
<?php
namespace Generators;
class EmailGenerator
{
public function getEmails()
{
$file = fopen('emails.dat', 'r');
while (!feof($file)) {
yield $this->transformEmail(fgets($file));
}
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Solution 3 is to use a generator.
Solution #3
<?php
require 'vendor/autoload.php';
$emailGenerator = new \Generators\EmailGenerator();
foreach ($emailGenerator->getEmails() as $email) {
//
}
Solution 3 is to use a generator.
Three solutions comparison
Solution #1 | Solution #2 | Solution #3 | |
---|---|---|---|
Time | 1.46 s | 3.71 s | 1.53 s |
Memory | 38.1 MB | 105 kB | 230 kB |
Let's take an example to introduce Generators.
Issue
What if we need to fetch from two different files now?
Array
<?php
namespace Generators;
class EmailArray
{
public function getEmails()
{
$emails = [];
$emails = $this->getEmailsFromFile('emails.dat', $emails);
$emails = $this->getEmailsFromFile('emails2.dat', $emails);
return $emails;
}
public function getEmailsFromFile($fileName, array $emails = [])
{
$file = fopen($fileName, 'r');
while (!feof($file)) {
$emails[] = $this->transformEmail(fgets($file));
}
return $emails;
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Generator
<?php
namespace Generators;
class EmailGenerator
{
public function getEmails()
{
yield from $this->getEmailsFromFirstFile('emails.dat');
yield from $this->getEmailsFromFirstFile('emails2.dat');
}
public function getEmailsFromFile($fileName)
{
$file = fopen($fileName, 'r');
while (!feof($file)) {
yield $this->transformEmail(fgets($file));
}
}
public function transformEmail($email)
{
return strtolower($email);
}
}
Internally
<?php
function xrange($start, $end, $step = 1) {
for ($i = $start; $i <= $end; $i += $step) {
yield $i;
}
}
foreach (xrange(1, 1000000) as $num) {
echo $num, "\n";
}
#1
<?php
foreach (range(1, 1000000) as $num) {
echo $num, "\n";
}
#2
Let's take the example of range()
526 ms
144 MB
855 ms
18 kB
Internally
Time | Memory | |
---|---|---|
range() | 526 ms | 144 MB |
xrange() | 855 ms | 18 kB |
Difference | + 62.54 % | - 799900 % |
Explanation | range() is faster because it's an internal PHP function | range() requires more memory because it builds an in-memory array while xrange() returns an iterator |
Internally
xrange() actually just returns an instance of Generator (that implements \Iterator)
<?php
function xrange($start, $end, $step = 1) {
for ($i = $start; $i <= $end; $i += $step) {
yield $i;
}
}
$range = xrange(1, 1000000);
var_dump(get_class($range)); // object(Generator)
var_dump($range instanceof \Iterator); // bool(true)
Internally
What happens if we double the range ?
range(1, 1000000) | range(1, 2000000) | xrange(1, 1000000) | xrange(1, 2000000) | |
---|---|---|---|---|
Time | 526 ms | 1120 ms | 890 ms | 1700 ms |
Memory | 144 MB | 275 MB | 17.6 kb | 17.6 kB |
x 2
x 2
x 2
x 1
For both range() and xrange() time increases linearly.
For range() memory increases linearly.
For xrange() memory does not increase.
Internally
Generators = = Iterator easy to implement
<?php
function rows() {
yield 'test';
yield 'test2';
yield 'test3';
}
$rows = rows();
foreach ($rows as $row) {
var_dump($row);
}
<?php
function rows() {
yield 'test';
yield 'test2';
yield 'test3';
}
$rows = rows();
var_dump($rows->current());
$rows->next();
var_dump($rows->current());
$rows->next();
var_dump($rows->current());
Internally
Generators are not Rewindable if closed or started
<?php
function rows() {
yield 'test';
yield 'test2';
yield 'test3';
}
$rows = rows();
foreach ($rows as $row) {
var_dump($row);
}
// This will throw an exception
$rows->rewind();
// This will throw an Exception
foreach ($rows as $row) {
var_dump($row);
}
Internally
You can create infinite generators
<?php
function infinite() {
while (true) {
yield microtime();
}
}
$infinite = infinite();
foreach ($infinite as $microtime) {
var_dump($microtime); // Will output an infinite of microtime();
}
Internally
Generators can be chained
<?php
$values = [
['first_name' => 'Harry', 'last_name' => 'Potter', 'birthday' => '1980-08-31'],
['first_name' => 'Ron', 'last_name' => 'Weasley', 'birthday' => '1980-03-01'],
['first_name' => 'Hermione', 'last_name' => 'Granger', 'birthday' => '1979-09-19']
];
function first_conversion($values) {
$id = 0;
foreach ($values as $value) {
$id++;
yield array_merge(['id' => $id], $value);
}
}
function second_conversion($values) {
foreach ($values as $value) {
$value['last_name'] = strtoupper($value['last_name']);
yield $value;
}
}
$toConverted = first_conversion(second_conversion($values));
foreach ($toConverted as $convertedData) {
print_r($convertedData);
}
Sending data to Generator
<?php
function outputter() {
echo yield;
}
$send = outputter();
$send->send('test'); // Will display "test"
$send->send('test2'); // Won't display anything as the Generator is closed
Coroutines are computer program components that generalize subroutines for nonpreemptive multitasking, by allowing multiple entry points for suspending and resuming execution at certain locations.
Sending data to Generator
<?php
function logger($fileName) {
$fileHandle = fopen($fileName, 'a');
while (true) {
fwrite($fileHandle, yield . "\n");
}
}
$logger = logger('log.txt');
$logger->send('Foo');
$logger->send('Bar');
// Will fill log.txt with two lines : Foo\n Bar\n
Real use cases : "Thread"
<?php
function step1()
{
$f = fopen("file.txt", 'r');
while ($line = fgets($f)) {
echo $line;
yield true;
}
}
function step2()
{
$f = fopen("file2.txt", 'r');
while ($line = fgets($f)) {
echo $line;
yield true;
}
}
function step3()
{
$f = fopen("file3.txt", 'r');
while ($line = fgets($f)) {
echo $line;
yield true;
}
}
function runner(array $steps)
{
while (true) {
foreach ($steps as $key => $step) {
$step->next();
if (!$step->valid()) {
unset($steps[$key]);
}
}
if (empty($steps)) return;
}
}
runner([step1(), step2(), step3()]);
Real use cases : "DataProvider"
<?php
class SomethingTest extends PHPUnit_Framework_TestCase
{
/**
* @dataProvider addressesProvider
*/
public function testSomethingThatRequiresAnAddress(Address $address)
{
// ...
}
public function addressesProvider()
{
for ($i = 0; $i < 10; $i++) {
yield [
new Address (
// Random string with length between 8 and 16
substr(
str_shuffle('abcdefghijklmnopqrstuvwxyz'),
0,
rand(8, 16)
),
// Random five digit number
sprintf('%05d', rand(1, 99999)),
// Random string with length between 8 and 16
substr(
str_shuffle('abcdefghijklmnopqrstuvwxyz'),
0,
rand(8, 16)
),
// Random string with length 2
substr(
str_shuffle('abcdefghijklmnopqrstuvwxyz'),
0,
2
)
)
];
}
}
}
Real use cases : "Generator Control"
<?php
function nums() {
for ($i = 0; $i < 5; ++$i) {
$cmd = (yield $i);
if ($cmd == 'stop') {
return;
}
}
}
$gen = nums();
foreach ($gen as $v) {
// we are satisfied
if ($v == 3) {
$gen->send('stop');
}
echo "{$v}\n";
}
Real use cases : "Fibonacci"
<?php
function fibonacci($count)
{
$prev = 0;
$current = 1;
for ($i = 0; $i < $count; ++$i) {
yield $prev;
$next = $prev + $current;
$prev = $current;
$current = $next;
}
}
foreach (fibonacci(48) as $i => $value) {
echo $i, ' -> ', $value, PHP_EOL;
}
Real use cases : "Obfuscation"
<?php
class Repository
{
public function getData()
{
foreach ($this->manager->getAll() as $data) {
yield $this->parseData($data);
}
}
protected function parseData($data)
{
$data->setSensitiveData(null);
}
}
$repository = new Repository();
foreach ($repository->getData() as $data) {
var_dump($data); // No sensitive data
}
https://nikic.github.io/2012/12/22/Cooperative-multitasking-using-coroutines-in-PHP.html