Building a search recommendation engine using D8 y Solr
Drupalcamp Alicante 2018
About me
name
Samuel Solís
nick
@estoyausente
company
Bodeboca
http://www.bodeboca.com
Recommendation engine
Index
- Build a data model
- Get the data
- Keep the data updated
- Implement a recommendation algorithm
- Expose the result
- Next steps
Warning!
100% production code
Build a data model
Raw data module
$/valentina/web/modules/custom/bb_raw_data$ tree .
├── bb_raw_data.drush.inc
├── bb_raw_data.info.yml
├── bb_raw_data.links.menu.yml
├── bb_raw_data.module
├── bb_raw_data.permissions.yml
├── bb_raw_data.routing.yml
├── bb_raw_data.services.yml
├── src
│ ├── Entity
│ │ ├── BodebocaNode.php
│ │ └── Transaction.php
│ ├── Form
│ │ └── UpdateDataForm.php
│ ├── Importer.php
│ ├── Plugin
│ │ └── QueueWorker
│ │ ├── ImportBodebocaNode.php
│ │ └── ImportTransaction.php
│ └── TransactionManager.php
└── tests
└── src
└── Unit
└── ImporterTest.php
Transaction
bbNode
User-product relationship
Product information
D8 Custom entities
/**
* Defines the Order entity.
*
* @ingroup raw_data
*
* @ContentEntityType(
* id = "transaction",
* label = @Translation("Transaction entity"),
* handlers = {
* "view_builder" = "Drupal\Core\Entity\EntityViewBuilder",
* "views_data" = "Drupal\views\EntityViewsData",
* },
* base_table = "bb_raw_data_transactions",
* admin_permission = "administer raw data",
* fieldable = FALSE,
* entity_keys = {
* "id" = "id",
* "order_id" = "order_id",
* },
* )
*/
class Transaction extends ContentEntityBase {
public static function baseFieldDefinitions(EntityTypeInterface $entity_type) {
// Standard field, used as unique if primary index.
$fields['id'] = BaseFieldDefinition::create('integer')
->setLabel(t('ID'))
->setDescription(t('The ID of the Transaction entity.'))
->setReadOnly(TRUE);
// Bodeboca order id.
$fields['order_id'] = BaseFieldDefinition::create('integer')
->setLabel(t('Short order id'))
->setDescription(t('Short order id'))
->setDisplayOptions('view', [
'label' => 'above',
'type' => 'int',
])
->setDisplayConfigurable('view', TRUE)
->setReadOnly(TRUE);
// Bodeboca user id.
$fields['uid'] = BaseFieldDefinition::create('integer')
->setLabel(t('Customer user id'))
->setDescription(t('Customer user id in origin (in bb.com)'))
->setDisplayOptions('view', [
'label' => 'above',
'type' => 'int',
])
->setDisplayConfigurable('view', TRUE)
->setReadOnly(TRUE);
// Bodeboca pruduct nids.
$fields['nid'] = BaseFieldDefinition::create('entity_reference')
->setLabel(t('Product nid'))
->setDescription(t('Product nid in backend.'))
->setSetting('target_type', 'BodebocaNode');
// User node valoration. range(0 - 100).
$fields['valoration'] = BaseFieldDefinition::create('integer')
->setLabel(t('User product valoration'))
->setDescription(t('Optional value, user can valorate the product from 0 to 100'))
->setReadOnly(TRUE);
// Bodeboca order created date.
$fields['order_date'] = BaseFieldDefinition::create('timestamp')
->setLabel(t('Order date'))
->setDescription(t('Date when the order has been created.'))
->setDisplayOptions('view', [
'label' => 'above',
])
->setDisplayConfigurable('view', TRUE);
// Meta fields. Fields about this entity.
$fields['changed'] = BaseFieldDefinition::create('changed')
->setLabel(t('Changed'))
->setDescription(t('The time that the entity was last edited.'));
return $fields;
}
}
Get the data
+800k order items
+60k clients
+8k products
Raw data module
$/valentina/web/modules/custom/bb_raw_data$ tree .
├── bb_raw_data.drush.inc
├── bb_raw_data.info.yml
├── bb_raw_data.links.menu.yml
├── bb_raw_data.module
├── bb_raw_data.permissions.yml
├── bb_raw_data.routing.yml
├── bb_raw_data.services.yml
├── src
│ ├── Entity
│ │ ├── BodebocaNode.php
│ │ └── Transaction.php
│ ├── Form
│ │ └── UpdateDataForm.php
│ ├── Importer.php
│ ├── Plugin
│ │ └── QueueWorker
│ │ ├── ImportBodebocaNode.php
│ │ └── ImportTransaction.php
│ └── TransactionManager.php
└── tests
└── src
└── Unit
└── ImporterTest.php
$> drush import_all_transaction
*/15 * * * * /usr/local/bin/drush --root=/var/www/html/valentina/web queue-run
import_all_transactions --time-limit=60 --uri=http://path.to.drupal.org >/dev/null 2>&1
Process element
/**
* @QueueWorker(
* id = "import_transaction",
* title = @Translation("Import Transaction"),
* )
*/
class ImportTransaction extends QueueWorkerBase
implements ContainerFactoryPluginInterface {
protected $importer;
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
return new static(
$container->get('bb_raw_data.importer')
);
}
/**
* @param mixed $data
*/
public function processItem($data){
$this->importer->importTransactions($data[0], $data[1]);
return true;
}
}
Drush command
function drush_bb_raw_data_import_all_transaction($from = NULL) {
if (!$from) {
// First Bodeboca order.
$from = '2010-12-10 00:00:00';
}
$from = new \DateTime($from);
if (!$from) {
throw new \Exception('From date cannot be initialized. Set the from date using a correct php date string.');
}
$now = new \DateTime();
while ($from < $now) {
// Get the queue.
$queue_factory = \Drupal::service('queue');
$queue = $queue_factory->get('import_transaction');
// Compose the params.
$to = clone $from;
$to->modify('+3 days');
$data = [$from, $to];
/** @var \Drupal\Core\Queue\QueueInterface $queue */
$queue->createItem($data);
$from = clone $to;
}
}
Keep the data updated
D6
D8
REST
api module
bb_api/
├── bb_api.info.yml
└── src
└── Plugin
└── rest
└── resource
├── BodebocaNodeExtended.php
├── BodebocaScoredNodes.php
├── BodebocaTransactionExtended.php
└── BodebocaUserRecommendations.php
/**
* Provides a resource to get view modes by entity and bundle.
*
* @RestResource(
* id = "bodeboca_transaction_extended:transaction",
* label = @Translation("Bodeboca transaction extended"),
* uri_paths = {
* "canonical" = "/bb_api/bodeboca_transaction_extended/{entity}",
* "https://www.drupal.org/link-relations/create" = "/bb_api/bodeboca_transaction_extended"
* }
* )
*/
class BodebocaTransactionExtended extends ResourceBase {
public function post($data) {
\Drupal::service('bb_raw_data.transaction_manager')->updateOrder($data);
// Return an empty value with code 200.
return new ResourceResponse([], 200);
}
}
Implement a recommendation algorithm
50%?
50%?
20%?
80%?
5%?
95%?
Configurable
D8
REST (get)
Mysql
SearchAPI
+
Search API Solr
Solr
D6
Search API Backend
Backend
Solr Index (fields)
Solr index (status)
D6
D8
REST
SearchAPI
Solr
Update
bb_brain/
├── bb_brain.drush.inc
├── bb_brain.info.yml
├── bb_brain.links.menu.yml
├── bb_brain.module
├── bb_brain.permissions.yml
├── bb_brain.routing.yml
├── bb_brain.services.yml
└── src
├── Annotation
│ └── BrainPartial.php
├── BrainPartialBase.php
├── BrainPartialInterface.php
├── BrainPartialManager.php
├── Form
│ ├── BrainForm.php
│ └── ConfigPartialsForm.php
├── Plugin
│ ├── BrainPartial
│ │ ├── BrainPartialOrders.php
│ │ └── BrainPartialRating.php
│ └── QueueWorker
│ └── Calculate.php
├── SearchApiConnect.php
└── Utils.php
Brain module
/**
* Defines a Brain Partial item annotation object.
*
* @see \Drupal\bb_brain\BrainPartialManager
* @see plugin_api
*
* @Annotation
*/
class BrainPartial extends Plugin {
/**
* The plugin ID.
*
* @var string
*/
public $id;
/**
* The label of the plugin.
*
* @var \Drupal\Core\Annotation\Translation
*
* @ingroup plugin_translatable
*/
public $label;
}
bb_brain/
├── bb_brain.drush.inc
├── bb_brain.info.yml
├── bb_brain.links.menu.yml
├── bb_brain.module
├── bb_brain.permissions.yml
├── bb_brain.routing.yml
├── bb_brain.services.yml
└── src
├── Annotation
│ └── BrainPartial.php
├── BrainPartialBase.php
├── BrainPartialInterface.php
├── BrainPartialManager.php
├── Form
│ ├── BrainForm.php
│ └── ConfigPartialsForm.php
├── Plugin
│ ├── BrainPartial
│ │ ├── BrainPartialOrders.php
│ │ └── BrainPartialRating.php
│ └── QueueWorker
│ └── Calculate.php
├── SearchApiConnect.php
└── Utils.php
Brain module
/**
* Calculate a rate using the orders.
*
* @BrainPartial(
* id = "bb_brain_brainpartial_orders",
* label = @Translation("Brain Partial Orders")
* )
*/
class BrainPartialOrders extends BrainPartialBase implements BrainPartialInterface {
/**
* {@inheritdoc}
*/
public function processPartial(array $nids) {
// Get all wines bought by these users.
$index = Index::load('transaction');
$query = $index->query([
'offset' => 0,
]);
// Subquery join Products that people bought where
// they bought a specific product.
$nidsString = implode(' ', $nids);
$query->setOption('solr_param_q', "{!join from=its_uid to=its_uid}its_nid:($nidsString)");
// Remove the nodes searched to the results.
// A node can't be his own recommendation.
$query->addCondition('nid', $nids, 'NOT IN');
// And group the results by node. We will use the total values.
$query->setOption('search_api_grouping', [
'use_grouping' => TRUE,
'fields' => [
'brand',
],
]);
$query->addTag('bb_brain_process_partial');
$response = $query->execute();
return Utils::getValuesFromGroup($response, 'ss_brand', $this->getPonderation(), $this->getVerbose());
}
}
bb_brain/
├── bb_brain.drush.inc
├── bb_brain.info.yml
├── bb_brain.links.menu.yml
├── bb_brain.module
├── bb_brain.permissions.yml
├── bb_brain.routing.yml
├── bb_brain.services.yml
└── src
├── Annotation
│ └── BrainPartial.php
├── BrainPartialBase.php
├── BrainPartialInterface.php
├── BrainPartialManager.php
├── Form
│ ├── BrainForm.php
│ └── ConfigPartialsForm.php
├── Plugin
│ ├── BrainPartial
│ │ ├── BrainPartialOrders.php
│ │ └── BrainPartialRating.php
│ └── QueueWorker
│ └── Calculate.php
├── SearchApiConnect.php
└── Utils.php
Brain module
public function calculateRecommended(array $nids = [], $verbose = FALSE) {
// Search all plugins that implement BrainPartial.
$partialResults = [];
$brainManager = \Drupal::service('plugin.manager.bb_brain');
$partials = $brainManager->getDefinitions();
$keys = [];
/*
* Calculate the nodes using the brand of each node.
* We want to recommend based of brand in place of based of nids.
* In this snippet we calculate all nids of a brand
* of the nid related.
*/
if ($nids) {
$nidsToCalculate = [];
$bbNodes = BodebocaNode::loadMultiple($nids);
foreach ($bbNodes as $bbNode) {
$brand = $bbNode->get('brand')->value;
$allNidsOfVintage = \Drupal::entityQuery('BodebocaNode')
->condition('brand', $brand)
->execute();
$nidsToCalculate = array_merge($nidsToCalculate, $allNidsOfVintage);
}
}
// Solr can throw an Exception if the server is away.
try {
// Get all partial results.
foreach ($partials as $partial) {
/* @var \Drupal\bb_brain\BrainPartialInterface $pluginInstance */
$pluginInstance = $brainManager->createInstance($partial['id']);
$pluginInstance->setVerbose($verbose);
// Sometimes we have disabled plugins with the ponderation value as 0
if ($pluginInstance->getPonderation() > 0) {
$partialResult = $pluginInstance->processPartial($nidsToCalculate);
$partialResults[] = $partialResult;
// $keys contains all keys inside any of the partial results.
// All nodes are important in the result.
$keys = array_keys($keys + $partialResult);
}
}
}
// @todo improve this exception management.
catch (SearchApiException $e) {
drupal_set_message($e->getMessage(), 'error');
}
// Compact all result adding each value.
// The value was previously pondered.
$result = [];
foreach ($keys as $key) {
$result[$key]['ponderation'] = 0;
foreach ($partialResults as $partialResult) {
// Only if the ponderation has value use the data.
if (isset($partialResult[$key]['ponderation'])) {
$result[$key]['ponderation'] += $partialResult[$key]['ponderation'];
if (isset($partialResult[$key]['matches'])) {
$result[$key]['matches'] += $partialResult[$key]['matches'];
}
if (isset($partialResult[$key]['totalMatches'])) {
$result[$key]['totalMatches'] = $partialResult[$key]['totalMatches'];
}
}
}
}
// Order the array by value.
arsort($result);
return $result;
}
Expose the data
api module
bb_api/
├── bb_api.info.yml
└── src
└── Plugin
└── rest
└── resource
├── BodebocaNodeExtended.php
├── BodebocaScoredNodes.php
├── BodebocaTransactionExtended.php
└── BodebocaUserRecommendations.php
/**
* Provides a resource to get view modes by entity and bundle.
*
* @RestResource(
* id = "bodeboca_scored_nodes",
* label = @Translation("Bodeboca scored nodes"),
* uri_paths = {
* "canonical" = "/bb_api/bodeboca_scored_nodes",
* }
* )
*/
class BodebocaScoredNodes extends ResourceBase {
/**
* GET request response.
*
* @return \Drupal\rest\ResourceResponse
* a list of bundles for specified entity.
*
* @throws \Symfony\Component\HttpKernel\Exception\HttpException
* Throws exception expected.
*/
public function get() {
if (!$this->currentUser->hasPermission('access content')) {
throw new AccessDeniedHttpException();
}
// @todo: Inject this service.
$result = \Drupal::service('bb_brain.search_api_conect')->calculateRecommended();
$build = [
'#cache' => [
'max-age' => 0,
],
];
$cache_metadata = CacheableMetadata::createFromRenderArray($build);
// Create Response object with the cache.
$resourceResponse = new ResourceResponse($result);
$resourceResponse->addCacheableDependency($cache_metadata);
return $resourceResponse;
}
}
Some results
Next Steps
This is a universe
- Improve performance
- Results performance
- Use a Matrix Factorization strategy
- Create clusters of users based on his purchases
- ...
- ...
- ...
- Really, a fucking big universe
Thanks!
name
Samuel Solís
nick
@estoyausente
company
Bodeboca
Building a serch recommendation engine
By Samuel Solís
Building a serch recommendation engine
- 1,655