What is this "search" that you speak of??

@honzakral

"unstructured"

Looking for content

grep -i -r 'web.*framework'
WHERE text ILIKE '%python%'

long, long time ago...

long, long time ago...

Bible concordance,

finished 1230

1230

Demo Time!

{
    'description': {
        ...
        'programming': {1},
        'python': {0, 1},
        'quick': {0, 1},
        'reinvent': {0},
        ...
    },
    'title': { ... }
}
                             
                    
def index_docs(docs, *fields):   
    index = defaultdict(
            lambda: defaultdict(set)) 
    
    for id, doc in enumerate(docs):
        for field in fields:  
            for token in analyze(doc[field]):
                index[field][token].add(id)
    return index
SPLIT_RE = re.compile(r'[^a-zA-Z0-9]')
def tokenize(text):
    yield from SPLIT_RE.split(text)

def lowercase(tokens):
    for t in tokens:
        yield t.lower()

SYNONYMS = {
    'rapid': 'quick',
}   
def synonyms(tokens):
    for t in tokens:
        yield SYNONYMS.get(t, t)

def analyze(text):
    tokens = tokenize(text)
    for token_filter in (lowercase, synonyms):
        tokens = token_filter(tokens)
    yield from tokens



COMBINE = {
    'OR': set.union,
    'AND': set.intersection,
}

def search_in_fields(index, query, fields):
    for t in analyze(query):
        yield COMBINE['OR'](*(index[f][t] for f in fields))

def search(index, query, operator='AND', fields=None):
    fields = fields or index.keys()
    combine = COMBINE[operator]
    return combine(*search_in_fields(index, query, fields))

Real world

Dictionary

dict -> list

Postings List

set -> list

Combine

set union/intersect -> merge lists

Complex Queries

Prefix

py*

Phrase

"monty python"

http://bit.ly/searchpy

Thank you!

@honzakral

http://bit.ly/searchpy