Не только нейронные сети верстают сайты

Александр Лифанов

ITGM 2018

ITGM 2018

Pix2code

UIzard

ITGM 2018

Frontend will die

ITGM 2018

Pix2code

ITGM 2018

ITGM 2018

DSL

ITGM 2018

Genetic programming

ITGM 2018

Symbolic regression

ITGM 2018

Evolution

ITGM 2018

Mutation and crossover

ITGM 2018

ITGM 2018

DEAP

ITGM 2018

Import functions

import operator
import math
import random
import numpy

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp

from utils import draw_logbook

def protectedDiv(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1

ITGM 2018

Primitives

pset = gp.PrimitiveSet("MAIN", 1)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protectedDiv, 2)
pset.addPrimitive(operator.neg, 1)
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)
pset.addEphemeralConstant("rand101", lambda: random.randint(-1, 1))
pset.renameArguments(ARG0='x')

ITGM 2018

Genotype

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, 
    toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

ITGM 2018

Fitness function

def evalSymbReg(individual, points):
    func = toolbox.compile(expr=individual)
    # target: x**4 + x**3 + x**2 + x
    sqerrors = ((func(x) - x ** 4 - x ** 3 - x ** 2 - x) ** 2 
        for x in points)
    return math.fsum(sqerrors) / len(points),

ITGM 2018

Operation settings

toolbox.register("evaluate", evalSymbReg, points=[x / 10. 
    for x in range(-10, 10)])
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), 
    max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), 
    max_value=17))

ITGM 2018

Main process

def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, logbook = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 50, stats=mstats,
                                   halloffame=hof, verbose=True)
    expr = hof[0]
    tree = gp.PrimitiveTree(expr)
    print(tree)
    draw_logbook(logbook)
    return pop, logbook, hof
  
# >>> add(mul(x, sub(x, neg(mul(x, sub(x, neg(mul(x, x))))))), x)

ITGM 2018

Train

ITGM 2018

DSL generation

ITGM 2018

My primitives

# dsl_funcs.py
def header(data):
    return mapping['header'].replace('{}', data)


def body(data):
    return mapping['body'].replace('{}', data)


def row(data):
    return mapping['row'].replace('{}', data)


def text(data):
    return mapping['text'].replace('[]', data)


def small_title(data):
    return mapping['small-title'].replace('[]', data)


def double(data):
    return mapping['double'].replace('{}', data)


def single(data):
    return mapping['single'].replace('{}', data)


def btn_active(data):
    return mapping['btn-active'].replace('[]', data)


def btn_inactive(data):
    return mapping['btn-inactive'].replace('[]', data)


def btn_green(data):
    return mapping['btn-green'].replace('[]', data)


def btn_orange(data):
    return mapping['btn-orange'].replace('[]', data)

No list

# dsl_funcs.py
def concat(a, b):
    return a + b

ITGM 2018

ITGM 2018

My fitness function

RESULT_IMAGE = Image.open('target.png')
color_proportion = get_color_proportion(RESULT_IMAGE)
WEIGHTS_MASK = np.array([color_proportion[x] for x in RESULT_IMAGE.getdata()])

def get_diff_by_pixels(img1, img2, weights_mask):
    d1 = np.array(img1).reshape(img1.size[1], img1.size[0], 4)
    d2 = np.array(img2).reshape(img2.size[1], img2.size[0], 4)
    diff = np.invert(d1 == d2).astype(int)
    diff = diff.sum(axis=2)
    return (diff.flatten() * weights_mask).sum()
  
  
def evalByColorProportion(ind):
    func = toolbox.compile(expr=ind)
    markup = body(func(x='text'))
    ind_img = renderer.render_html(markup)
    ind_arr = np.array(ind_img)
    if ind_arr.shape != (640, 1024, 4):
        return 10000.0,  #
    dist = get_diff_by_pixels(RESULT_IMAGE, ind_img, weights_mask=WEIGHTS_MASK)
    return dist,

...

ITGM 2018

My fitness function

...

def get_color_proportion(img):
    cp = {}
    for pixel in img.getdata():
        if pixel in cp:
            cp[pixel] += 1
        else:
            cp[pixel] = 1

    sum_cp = sum(cp.values())
    for k in cp:
        cp[k] /= sum_cp
    return cp

ITGM 2018

Train

$ python gp_by_img.py
               fitness              size   
           ------------------------ -----------
gen nevals avg      min      avg  min
0   300    47 464,8 9 945,72 6,88 2  
1   186    21 913,6 9 945,72 5,39333 1  
2   189    22 880,3 9 945,72 6,06333 1  
3   176    20 074,9 9 945,72 5,38333 1  
4   166    23 838,3 9 945,72 4,66667 1  
5   183    22 182   9 945,72 4,31667 1  
6   184    22 484,4 9 945,72 4,11667 1  
7   198    24 173,1 9 945,72 4,01333 1  
8   186    23 723,3 4 099,15 4,14    1  
9   195    22 026,5 2 049,98 4,27667 1  
10  160    19 021,4 0,393263 4,29    1  
11  166    20 558,1 0,393263 4,93333 1  
12  178    20 804,6 0,393263 6,23667 1  
13  166    17 868,5 0        7,40667 1  
14  175    20 839,2 0        7,94    1  
15  177    16 587,1 0        7,98    1  
16  192    22 399,2 0        7,87    1  
17  193    19 394,3 0        7,92333 1  
18  173    16 216,4 0        8,01333 1  
19  180    18 485,9 0        7,99667 1  
20  184    19 850   0        8,04333 1

LOSS: (0.0,)
CODE: row(concat(double(btn_green(x)), double(btn_orange(x))))

ITGM 2018

Train

ITGM 2018

Result code

<div class="row">
  <div class="col-md-6">
    <a class="btn btn-success" href="#" role="button">text</a>
  </div>
  <div class="col-md-6">
    <a class="btn btn-warning" href="#" role="button">text</a>
  </div>
</div>

ITGM 2018

Result AST

ITGM 2018

Web components

import asttokens, ast
from collections import Counter

def extract_duplicate_subtrees(expr):
    atok = asttokens.ASTTokens(expr, parse=True)
    tree_code = atok.get_text(atok.tree)
    tokens = [atok.get_text(n) for n in ast.walk(atok.tree)]
    cntr = Counter(tokens)

    replaces = []
    for node_code, count in cntr.items():
        if node_code.count('(') > 1 and count > 1 and node_code != tree_code:
            replaces.append(node_code)

    return replaces

expr = "body(header(concat(row(btn('text')), row(btn('text')))))"
to_replace_subtrees = extract_duplicate_subtrees(expr)
for st in to_replace_subtrees:
    print(st)

# >>> row(btn('text'))

ITGM 2018

ITGM 2018

Conclusion

  • No memory (train on each task)
  • Structured data
  • Ability to aggregate (web components)
  • Scalability (parallelization)

ITGM 2018

Not only NN can generate HTML

ITGM 2018

Questions ?

ITGM 2018

GP for pix2code

By Alexander Lifanov

GP for pix2code

Using genetic progrmming with DEAP for generate HTML from image

  • 596