The what and why of Python and Elasticsearch
@honzakral
Elasticsearch
HTTP Rest
import requests
ES_URL = 'http://localhost:9200'
requests.put(ES_URL + '/i/t/42', json={"title": "PyCon Belarus"})
query = {"query": {"match": {"title": "pycon"}}}
data = requests.get(ES_URL + '/i/_search', json=query).json()
I know http!
well, actually...
Rich API
Distributed
Official Client(s)
- maintained
- full API support
- load balancing
- node failures
- different transports
- no opinions
No Opinions?!
"Nobody should have a reason not to use the official client."
1-to-1 mapping
response = client.search(
index="my-index",
body={
"query": {
"bool": {
"must": {"match": {"title": "python"}},
"must_not": {"match": {"description": "beta"}}
}
}
}
)
curl -X GET localhost:9200/my-index/_search -d '{
"query": {
"bool": {
"must": {"match": {"title": "python"}},
"must_not": {"match": {"description": "beta"}}
}
}
}'
Component architecture
from elasticsearch import Elasticsearch
from elasticsearch_async import AsyncTransport
client = Elasticsearch(transport_class=AsyncTransport)
Unified test suite
elasticsearch-py
response = client.search(
index="my-index",
body={
"query": {
"bool": {
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"description": "beta"}}]
"filter": [{"term": {"category": "search"}}]
}
},
"aggs" : {
"per_tag": {
"terms": {"field": "tags"},
"aggs": {
"max_lines": {"max": {"field": "lines"}}
}
}
}
}
)
for hit in response['hits']['hits']:
print(hit['_score'], hit['_source']['title'])
elasticsearch-dsl
s = Search(using=client, index="my-index")
# filter only search
s = s.filter("term", category="search")
# we want python in title
s = s.query("match", title="python")
# and no beta releases
s = s.query(~Q("match", description="beta"))
# aggregate on tags
s.aggs.bucket('per_tag', 'terms', field='tags')
# max lines per tag
s.aggs['per_tag'].metric('max_lines', 'max', field='lines')
Hide mechanics,
not meaning!
Mechanics
response = client.search(
index="my-index",
body={
"query": {
"bool": {
"must": [{"match": {"title": "python"}}],
"must_not": [{"match": {"description": "beta"}}]
"filter": [{"term": {"category": "search"}}]
}
},
"aggs" : {
"per_tag": {
"terms": {"field": "tags"},
"aggs": {
"max_lines": {"max": {"field": "lines"}}
}
}
}
}
)
for hit in response['hits']['hits']:
print(hit['_score'], hit['_source']['title'])
Meaning
s = Search(using=client, index="my-index")
s = s.filter("term", category="search")
s = s.query("match", title="python")
s = s.query(~Q("match", description="beta"))
s.aggs.bucket('per_tag', 'terms', field='tags') \
.metric('max_lines', 'max', field='lines')
for hit in s:
print(hit.meta.score, hit.title)
Integrations
DocType
from datetime import date
from elasticsearch_dsl import DocType, Text, Keyword, Date
class BlogPost(DocType):
title = Text(analyzer="english")
body = Text(analyzer="english")
published_date = Date()
tags = Keyword(multi=True)
BlogPost.search('terms', tags=['python', 'belarus'])
bp = BlogPost(title='PyCon Belarus', published_date=date.today())
bp.tags.append('python')
bp.save()
Django Integration?
from django import models
from .search import BlogPost as BlogPostDoc
class BlogPost(models.Model):
title = models.CharField(max_length=200)
...
def to_search(self):
return BlogPostDoc(
_id=self.id,
title=self.title,
tags=[t.name for t in self.tags.all()],
...
)
def update_search(instance, **kwargs):
instance.to_search().save()
def remove_from_search(instance, **kwargs):
instance.to_search().delete()
post_save.connect(update_search, sender=BlogPost)
pre_delete.connect(remove_from_search, sender=BlogPost)
Faceted Search
Faceted Search
from elasticsearch_dsl import FacetedSearch, TermsFacet, DateHistogramFacet
class BlogSearch(FacetedSearch):
doc_types = [BlogPost, ]
fields = ['tags', 'title', 'body']
facets = {
'tags': TermsFacet(field='tags'),
'months': DateHistogramFacet(field='published_date', interval='month')
}
bs = BlogSearch('python web', {'months': date(2015, 6)})
Future?
Thanks!
@honzakral
The What and Why of Python and Elasticsearch
By Honza Král
The What and Why of Python and Elasticsearch
- 2,353