Building a search recommendation engine using D8 y Solr

Drupalcamp Alicante 2018

About me

name

Samuel Solís

nick

@estoyausente

company

Bodeboca

http://www.bodeboca.com

Recommendation engine

Index

  • Build a data model
  • Get the data
  • Keep the data updated
  • Implement a recommendation algorithm
  • Expose the result
  • Next steps

Warning!

100% production code

Build a data model

Raw data module

$/valentina/web/modules/custom/bb_raw_data$ tree .

├── bb_raw_data.drush.inc
├── bb_raw_data.info.yml
├── bb_raw_data.links.menu.yml
├── bb_raw_data.module
├── bb_raw_data.permissions.yml
├── bb_raw_data.routing.yml
├── bb_raw_data.services.yml
├── src
│   ├── Entity
│   │   ├── BodebocaNode.php
│   │   └── Transaction.php
│   ├── Form
│   │   └── UpdateDataForm.php
│   ├── Importer.php
│   ├── Plugin
│   │   └── QueueWorker
│   │       ├── ImportBodebocaNode.php
│   │       └── ImportTransaction.php
│   └── TransactionManager.php
└── tests
    └── src
        └── Unit
            └── ImporterTest.php

Transaction

bbNode

User-product relationship

Product information

D8 Custom entities

/**
 * Defines the Order entity.
 *
 * @ingroup raw_data
 *
 * @ContentEntityType(
 *   id = "transaction",
 *   label = @Translation("Transaction entity"),
 *   handlers = {
 *     "view_builder" = "Drupal\Core\Entity\EntityViewBuilder",
 *     "views_data" = "Drupal\views\EntityViewsData",
 *   },
 *   base_table = "bb_raw_data_transactions",
 *   admin_permission = "administer raw data",
 *   fieldable = FALSE,
 *   entity_keys = {
 *     "id" = "id",
 *     "order_id" = "order_id",
 *   },
 * )
 */
class Transaction extends ContentEntityBase {
    
  public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {

    // Standard field, used as unique if primary index.
    $fields['id'] = BaseFieldDefinition::create('integer')
      ->setLabel(t('ID'))
      ->setDescription(t('The ID of the Transaction entity.'))
      ->setReadOnly(TRUE);

    // Bodeboca order id.
    $fields['order_id'] = BaseFieldDefinition::create('integer')
      ->setLabel(t('Short order id'))
      ->setDescription(t('Short order id'))
      ->setDisplayOptions('view', [
        'label' => 'above',
        'type' => 'int',
      ])
      ->setDisplayConfigurable('view', TRUE)
      ->setReadOnly(TRUE);

    // Bodeboca user id.
    $fields['uid'] = BaseFieldDefinition::create('integer')
      ->setLabel(t('Customer user id'))
      ->setDescription(t('Customer user id in origin (in bb.com)'))
      ->setDisplayOptions('view', [
        'label' => 'above',
        'type' => 'int',
      ])
      ->setDisplayConfigurable('view', TRUE)
      ->setReadOnly(TRUE);

    // Bodeboca pruduct nids.
    $fields['nid'] = BaseFieldDefinition::create('entity_reference')
      ->setLabel(t('Product nid'))
      ->setDescription(t('Product nid in backend.'))
      ->setSetting('target_type', 'BodebocaNode');

    // User node valoration. range(0 - 100).
    $fields['valoration'] = BaseFieldDefinition::create('integer')
      ->setLabel(t('User product valoration'))
      ->setDescription(t('Optional value, user can valorate the product from 0 to 100'))
      ->setReadOnly(TRUE);

    // Bodeboca order created date.
    $fields['order_date'] = BaseFieldDefinition::create('timestamp')
      ->setLabel(t('Order date'))
      ->setDescription(t('Date when the order has been created.'))
      ->setDisplayOptions('view', [
        'label' => 'above',
      ])
      ->setDisplayConfigurable('view', TRUE);

    // Meta fields. Fields about this entity.
    $fields['changed'] = BaseFieldDefinition::create('changed')
      ->setLabel(t('Changed'))
      ->setDescription(t('The time that the entity was last edited.'));

    return $fields;
  }
}

Get the data

+800k order items
+60k clients
+8k products

Raw data module

$/valentina/web/modules/custom/bb_raw_data$ tree .

├── bb_raw_data.drush.inc
├── bb_raw_data.info.yml
├── bb_raw_data.links.menu.yml
├── bb_raw_data.module
├── bb_raw_data.permissions.yml
├── bb_raw_data.routing.yml
├── bb_raw_data.services.yml
├── src
│   ├── Entity
│   │   ├── BodebocaNode.php
│   │   └── Transaction.php
│   ├── Form
│   │   └── UpdateDataForm.php
│   ├── Importer.php
│   ├── Plugin
│   │   └── QueueWorker
│   │       ├── ImportBodebocaNode.php
│   │       └── ImportTransaction.php
│   └── TransactionManager.php
└── tests
    └── src
        └── Unit
            └── ImporterTest.php



$> drush import_all_transaction
*/15 * * * * /usr/local/bin/drush --root=/var/www/html/valentina/web queue-run 
   import_all_transactions --time-limit=60 --uri=http://path.to.drupal.org >/dev/null 2>&1

Process element

/**
 * @QueueWorker(
 *   id = "import_transaction",
 *   title = @Translation("Import Transaction"),
 * )
 */
class ImportTransaction extends QueueWorkerBase
 implements ContainerFactoryPluginInterface  {
 
  protected $importer;
  
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static(
      $container->get('bb_raw_data.importer')
    );
  }  
 
  /**
   * @param mixed $data
   */
  public function processItem($data){
   $this->importer->importTransactions($data[0], $data[1]);
    return true;
  }
}

Drush command

function drush_bb_raw_data_import_all_transaction($from = NULL) {
  if (!$from) {
    // First Bodeboca order.
    $from = '2010-12-10 00:00:00';
  }

  $from = new \DateTime($from);

  if (!$from) {
    throw new \Exception('From date cannot be initialized. Set the from date using a correct php date string.');
  }

  $now = new \DateTime();

  while ($from < $now) {
    // Get the queue.
    $queue_factory = \Drupal::service('queue');
    $queue = $queue_factory->get('import_transaction');

    // Compose the params.
    $to = clone $from;
    $to->modify('+3 days');
    $data = [$from, $to];

    /** @var \Drupal\Core\Queue\QueueInterface $queue */
    $queue->createItem($data);
    $from = clone $to;
  }
}

Keep the data updated

D6

D8

REST

api module

bb_api/
├── bb_api.info.yml
└── src
    └── Plugin
        └── rest
            └── resource
                ├── BodebocaNodeExtended.php
                ├── BodebocaScoredNodes.php
                ├── BodebocaTransactionExtended.php
                └── BodebocaUserRecommendations.php
/**
 * Provides a resource to get view modes by entity and bundle.
 *
 * @RestResource(
 *   id = "bodeboca_transaction_extended:transaction",
 *   label = @Translation("Bodeboca transaction extended"),
 *   uri_paths = {
 *     "canonical" = "/bb_api/bodeboca_transaction_extended/{entity}",
 *     "https://www.drupal.org/link-relations/create" = "/bb_api/bodeboca_transaction_extended"
 *   }
 * )
 */
class BodebocaTransactionExtended extends ResourceBase {

  public function post($data) {

    \Drupal::service('bb_raw_data.transaction_manager')->updateOrder($data);

    // Return an empty value with code 200.
    return new ResourceResponse([], 200);
  }
}

Implement a recommendation algorithm

50%?

50%?

20%?

80%?

5%?

95%?

Configurable

D8

REST (get)

Mysql

SearchAPI

+

Search API Solr

Solr

D6

Search API Backend

Backend

Solr Index (fields)

Solr index (status)

D6

D8

REST

SearchAPI

Solr

Update

bb_brain/
├── bb_brain.drush.inc
├── bb_brain.info.yml
├── bb_brain.links.menu.yml
├── bb_brain.module
├── bb_brain.permissions.yml
├── bb_brain.routing.yml
├── bb_brain.services.yml
└── src
    ├── Annotation
    │   └── BrainPartial.php
    ├── BrainPartialBase.php
    ├── BrainPartialInterface.php
    ├── BrainPartialManager.php
    ├── Form
    │   ├── BrainForm.php
    │   └── ConfigPartialsForm.php
    ├── Plugin
    │   ├── BrainPartial
    │   │   ├── BrainPartialOrders.php
    │   │   └── BrainPartialRating.php
    │   └── QueueWorker
    │       └── Calculate.php
    ├── SearchApiConnect.php
    └── Utils.php

Brain module

/**
 * Defines a Brain Partial item annotation object.
 *
 * @see \Drupal\bb_brain\BrainPartialManager
 * @see plugin_api
 *
 * @Annotation
 */
class BrainPartial extends Plugin {


  /**
   * The plugin ID.
   *
   * @var string
   */
  public $id;

  /**
   * The label of the plugin.
   *
   * @var \Drupal\Core\Annotation\Translation
   *
   * @ingroup plugin_translatable
   */
  public $label;

}
bb_brain/
├── bb_brain.drush.inc
├── bb_brain.info.yml
├── bb_brain.links.menu.yml
├── bb_brain.module
├── bb_brain.permissions.yml
├── bb_brain.routing.yml
├── bb_brain.services.yml
└── src
    ├── Annotation
    │   └── BrainPartial.php
    ├── BrainPartialBase.php
    ├── BrainPartialInterface.php
    ├── BrainPartialManager.php
    ├── Form
    │   ├── BrainForm.php
    │   └── ConfigPartialsForm.php
    ├── Plugin
    │   ├── BrainPartial
    │   │   ├── BrainPartialOrders.php
    │   │   └── BrainPartialRating.php
    │   └── QueueWorker
    │       └── Calculate.php
    ├── SearchApiConnect.php
    └── Utils.php

Brain module

/**
 * Calculate a rate using the orders.
 *
 * @BrainPartial(
 *  id = "bb_brain_brainpartial_orders",
 *  label = @Translation("Brain Partial Orders")
 * )
 */
class BrainPartialOrders extends BrainPartialBase implements BrainPartialInterface {

  /**
   * {@inheritdoc}
   */
  public function processPartial(array $nids) {

    // Get all wines bought by these users.
    $index = Index::load('transaction');
    $query = $index->query([
      'offset' => 0,
    ]);

    // Subquery join Products that people bought where
    // they bought a specific product.
    $nidsString = implode(' ', $nids);
    $query->setOption('solr_param_q', "{!join from=its_uid to=its_uid}its_nid:($nidsString)");

    // Remove the nodes searched to the results.
    // A node can't be his own recommendation.
    $query->addCondition('nid', $nids, 'NOT IN');

    // And group the results by node. We will use the total values.
    $query->setOption('search_api_grouping', [
      'use_grouping' => TRUE,
      'fields' => [
        'brand',
      ],
    ]);

    $query->addTag('bb_brain_process_partial');
    $response = $query->execute();
    return Utils::getValuesFromGroup($response, 'ss_brand', $this->getPonderation(), $this->getVerbose());
  }
}
bb_brain/
├── bb_brain.drush.inc
├── bb_brain.info.yml
├── bb_brain.links.menu.yml
├── bb_brain.module
├── bb_brain.permissions.yml
├── bb_brain.routing.yml
├── bb_brain.services.yml
└── src
    ├── Annotation
    │   └── BrainPartial.php
    ├── BrainPartialBase.php
    ├── BrainPartialInterface.php
    ├── BrainPartialManager.php
    ├── Form
    │   ├── BrainForm.php
    │   └── ConfigPartialsForm.php
    ├── Plugin
    │   ├── BrainPartial
    │   │   ├── BrainPartialOrders.php
    │   │   └── BrainPartialRating.php
    │   └── QueueWorker
    │       └── Calculate.php
    ├── SearchApiConnect.php
    └── Utils.php

Brain module

public function calculateRecommended(array $nids = [], $verbose = FALSE) {

    // Search all plugins that implement BrainPartial.
    $partialResults = [];
    $brainManager = \Drupal::service('plugin.manager.bb_brain');
    $partials = $brainManager->getDefinitions();
    $keys = [];

    /*
     * Calculate the nodes using the brand of each node.
     * We want to recommend based of brand in place of based of nids.
     * In this snippet we calculate all nids of a brand
     * of the nid related.
     */
    if ($nids) {
      $nidsToCalculate = [];
      $bbNodes = BodebocaNode::loadMultiple($nids);
      foreach ($bbNodes as $bbNode) {
        $brand = $bbNode->get('brand')->value;
        $allNidsOfVintage = \Drupal::entityQuery('BodebocaNode')
          ->condition('brand', $brand)
          ->execute();
        $nidsToCalculate = array_merge($nidsToCalculate, $allNidsOfVintage);
      }
    }

    // Solr can throw an Exception if the server is away.
    try {
      // Get all partial results.
      foreach ($partials as $partial) {
        /* @var \Drupal\bb_brain\BrainPartialInterface $pluginInstance */
        $pluginInstance = $brainManager->createInstance($partial['id']);
        $pluginInstance->setVerbose($verbose);

        // Sometimes we have disabled plugins with the ponderation value as 0
        if ($pluginInstance->getPonderation() > 0) {
          $partialResult = $pluginInstance->processPartial($nidsToCalculate);
          $partialResults[] = $partialResult;

          // $keys contains all keys inside any of the partial results.
          // All nodes are important in the result.
          $keys = array_keys($keys + $partialResult);
        }
      }
    }
    // @todo improve this exception management.
    catch (SearchApiException $e) {
      drupal_set_message($e->getMessage(), 'error');
    }

    // Compact all result adding each value.
    // The value was previously pondered.
    $result = [];
    foreach ($keys as $key) {
      $result[$key]['ponderation'] = 0;
      foreach ($partialResults as $partialResult) {

        // Only if the ponderation has value use the data.
        if (isset($partialResult[$key]['ponderation'])) {
          $result[$key]['ponderation'] += $partialResult[$key]['ponderation'];
          if (isset($partialResult[$key]['matches'])) {
            $result[$key]['matches'] += $partialResult[$key]['matches'];
          }
          if (isset($partialResult[$key]['totalMatches'])) {
            $result[$key]['totalMatches'] = $partialResult[$key]['totalMatches'];
          }

        }
      }
    }

    // Order the array by value.
    arsort($result);
    return $result;
  }

Expose the data

api module

bb_api/
├── bb_api.info.yml
└── src
    └── Plugin
        └── rest
            └── resource
                ├── BodebocaNodeExtended.php
                ├── BodebocaScoredNodes.php
                ├── BodebocaTransactionExtended.php
                └── BodebocaUserRecommendations.php
/**
 * Provides a resource to get view modes by entity and bundle.
 *
 * @RestResource(
 *   id = "bodeboca_scored_nodes",
 *   label = @Translation("Bodeboca scored nodes"),
 *   uri_paths = {
 *     "canonical" = "/bb_api/bodeboca_scored_nodes",
 *   }
 * )
 */
class BodebocaScoredNodes extends ResourceBase {

  /**
   * GET request response.
   *
   * @return \Drupal\rest\ResourceResponse
   *   a list of bundles for specified entity.
   *
   * @throws \Symfony\Component\HttpKernel\Exception\HttpException
   *   Throws exception expected.
   */
  public function get() {

    if (!$this->currentUser->hasPermission('access content')) {
      throw new AccessDeniedHttpException();
    }

    // @todo: Inject this service.
    $result = \Drupal::service('bb_brain.search_api_conect')->calculateRecommended();

    $build = [
      '#cache' => [
        'max-age' => 0,
      ],
    ];
    $cache_metadata = CacheableMetadata::createFromRenderArray($build);

    // Create Response object with the cache.
    $resourceResponse = new ResourceResponse($result);
    $resourceResponse->addCacheableDependency($cache_metadata);

    return $resourceResponse;
  }
}

Some results

Next Steps

This is a universe

  • Improve performance
  • Results performance
  • Use a Matrix Factorization strategy
  • Create clusters of users based on his purchases
  • ...
  • ...
  • ...
  • Really, a fucking big universe

Thanks!

name

Samuel Solís

nick

@estoyausente

company

Bodeboca

Building a serch recommendation engine

By Samuel Solís

Building a serch recommendation engine

  • 1,655