What is this "search" that you speak of??
@honzakral
"unstructured"
Looking for content
grep -i -r 'web.*framework'
WHERE text ILIKE '%python%'
long, long time ago...
long, long time ago...
Bible concordance,
finished 1230
1230
Demo Time!
{
'description': {
...
'programming': {1},
'python': {0, 1},
'quick': {0, 1},
'reinvent': {0},
...
},
'title': { ... }
}
def index_docs(docs, *fields):
index = defaultdict(
lambda: defaultdict(set))
for id, doc in enumerate(docs):
for field in fields:
for token in analyze(doc[field]):
index[field][token].add(id)
return index
SPLIT_RE = re.compile(r'[^a-zA-Z0-9]')
def tokenize(text):
yield from SPLIT_RE.split(text)
def lowercase(tokens):
for t in tokens:
yield t.lower()
SYNONYMS = {
'rapid': 'quick',
}
def synonyms(tokens):
for t in tokens:
yield SYNONYMS.get(t, t)
def analyze(text):
tokens = tokenize(text)
for token_filter in (lowercase, synonyms):
tokens = token_filter(tokens)
yield from tokens
COMBINE = {
'OR': set.union,
'AND': set.intersection,
}
def search_in_fields(index, query, fields):
for t in analyze(query):
yield COMBINE['OR'](*(index[f][t] for f in fields))
def search(index, query, operator='AND', fields=None):
fields = fields or index.keys()
combine = COMBINE[operator]
return combine(*search_in_fields(index, query, fields))
Real world
Dictionary
dict -> list
Postings List
set -> list
Combine
set union/intersect -> merge lists
Complex Queries
Prefix
py*
Phrase
"monty python"
http://bit.ly/searchpy
Thank you!
@honzakral
http://bit.ly/searchpy
What is this "search" that you speak of?
By Honza Král
What is this "search" that you speak of?
PyCon ZA talk
- 927