Elasticsearch
Basics
Cluster
Node 1
Node 2
Node 3
Index A
Shard 1
Index A
Shard 2
Index A
Shard 3
Index A
Replica 2
Index A
Replica 3
Index A
Replica 1
Cluster 1
Index
Guidelines for indices
Data input
What's a document
{
"name": "John Doe",
"age": 42,
"confirmed": true,
"created": "2018-01-01T12:00:00",
"adress": {
"street": "Gatan 1",
"zip": "12344",
"city": "Farsta"
},
"tags": [
{ "type": "Category", "value": "IT" },
{ "type": "Employment period", "value": "Deltid" }
]
}Field names can NOT include a .
Document metadata
Indexing a document
PUT /{index}/{_doc}/{id}
{
"field": "value",
...
}PUT /website/_doc/123
{
"title": "My first blog entry",
"text": "Just trying this out...",
"date": "2014/01/01"
}{
"_index": "website",
"_type": "blog",
"_id": "123",
"_version": 1,
"created": true
}Request
Response
DocumentId
PUT /{index}/{_doc|_create}/{id}
POST /website/blog/
Concurrency control
Optimistic concurrency control
PUT /website/blog/1?version=1
External versionsnummer
Optimistic concurrency control
PUT /website/blog/2?version=5&version_type=external
Large data volumes
When adding large volumes of data prefer to use Bulk api
{ action: { metadata }}\n
{ request body }\n
{ action: { metadata }}\n
{ request body }\nPOST /_bulk
{ "delete": { "_index": "website", "_type": "blog", "_id": "123" }}
{ "create": { "_index": "website", "_type": "blog", "_id": "123" }}
{ "title": "My first blog post" }
{ "index": { "_index": "website", "_type": "blog" }}
{ "title": "My second blog post" }
{ "update": { "_index": "website", "_type": "blog", "_id": "123", "_retry_on_conflict" : 3} }
{ "doc" : {"title" : "My updated blog post"} }Delete document
DELETE /website/blog/123{
"found" : true,
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_version" : 3
}{
"found" : false,
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_version" : 4
}Request
OK Response (200)
Missing Response (404)
Data out
Get document by Id
GET /website/blog/123?pretty{
"_index" : "website",
"_type" : "blog",
"_id" : "123",
"_version" : 1,
"found" : true,
"_source" : {
"title": "My first blog entry",
"text": "Just trying this out...",
"date": "2014/01/01"
}
}Request
Response
Get multiple document by Id
GET /website/blog/_mget
{
"ids" : [ "2", "1" ]
}{
"docs" : [
{
"_index" : "website",
"_type" : "blog",
"_id" : "2",
"_version" : 10,
"found" : true,
"_source" : { "title": "My first external blog entry", "text": "This is a piece of cake..." }
},
{
"_index" : "website",
"_type" : "blog",
"_id" : "1",
"found" : false
}
]
}Request
Response
Schemas
Different types of search
Boolean searches
Full text search
Combinations
Inverted index
How the data is stored in elastic explains searches
Given the following documents:
Inverted index
| 1 | 2 |
----------------------
Den | x | |
---------|-----|-----|
snabba | x | |
---------|-----|-----|
bruna | x | x |
---------|-----|-----|
räven | x | |
---------|-----|-----|
hoppar | x | x |
---------|-----|-----|
över | x | x |
---------|-----|-----|
den | x | |
---------|-----|-----|
lata | x | x |
---------|-----|-----|
hunden | x | |
---------|-----|-----|
Snabba | | x |
---------|-----|-----|
rävar | | x |
---------|-----|-----|
hundar | | x |
---------|-----|-----|
på | | x |
---------|-----|-----|
sommaren | | x |
----------------------
Query: snabba bruna
Index
Terms | 1 | 2 |
--------------------
snabba | x | |
-------|-----|-----|
bruna | x | x |
--------------------
Total | 2 | 1 |
Normalisering
| 1 | 2 |
----------------------
den | x | |
---------|-----|-----|
snabb | x | x |
---------|-----|-----|
bruna | x | x |
---------|-----|-----|
räv | x | x |
---------|-----|-----|
hoppa | x | x |
---------|-----|-----|
över | x | x |
---------|-----|-----|
lata | x | x |
---------|-----|-----|
hund | x | x |
---------|-----|-----|
på | | x |
---------|-----|-----|
sommaren | | x |
----------------------
Query: snabba bruna
Index
Terms | 1 | 2 |
--------------------
snabb | x | x |
-------|-----|-----|
brun | x | x |
--------------------
Total | 2 | 2 |
Analysis
Standard analysers
When are analyzers used?
On all full text fields
It is used when indexing and when searching on the search string
Testing analyzers (Analyze API)
GET _analyze
{
"analyzer" : "standard",
"text" : "this is a test"
}GET _analyze
{
"analyzer" : "standard",
"text" : [
"this is a test",
"the second text"
]
}GET _analyze
{
"tokenizer" : "keyword",
"filter" : ["lowercase"],
"text" : "this is a test"
}GET _analyze
{
"tokenizer" : "keyword",
"filter" : ["lowercase"],
"char_filter" : ["html_strip"],
"text" : "this is a <b>test</b>"
}Exempel
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase"],
"char_filter": ["html_strip"]
}
}
}
}
}
Mapping
{
"name": "Maria Kihlgren",
"birth_date": "1960-01_31",
"adress": {
"street": "Drevgatan 60, 6tr",
"zipcode": 13500,
"city": "Karlstad"
},
"contacts": {
"home_phone": "015 – 15 15 15",
"modile_phone": "070 – 15 15 16 ",
"email": "maria@kihlgren.se"
},
"ambition": "Jag ser det om en utmaning att arbeta vidare
med sådant som jag tycker är kul samtidigt som jag motiveras av
viljan att lära mig nya saker, att utvecklas och genom det
bidra till att föra verksamheten vidare"
}Example
Mapping
{
"name": string,
"birth_date": date,
"adress": {
"street": text,
"zipcode": number,
"city": keyword
},
"contacts": {
"home_phone": keyword,
"modile_phone": keyword,
"email": email
},
"ambition": text
}Types
Mapping
{
"mappings": {
"candidate": {
"properties": {
"name": { "type": "text" },
"birth_date": { "type": "date" },
"adress": {
"properties": {
"street": { "type": "text" },
"zipcode": { "type": "long" },
"city": { "type": "keyword" }
}
},
"contacts": {
"properties": {
"home_phone": { "type": "keyword" },
"modile_phone": { "type": "keyword" },
"email": { "type": "keyword" }
}
},
"ambition": {
"type": "text"
}
}
}
}
}
Index mapping
Queries
Match_All
POST candidates/_search
POST candidates/_search {}
POST candidates/_search
{
"query": {
"match_all": {}
}
}Match
// match on full text
POST candidates/candidate/_search
{
"query": {
"match": {
"name": "Maria"
}
}
}
POST candidates/candidate/_search
{
"query": {
"match": {
"adress.city": "Karlstad"
}
}
}
Term/Terms
// Ok
POST candidates/candidate/_search
{
"query": { "term": { "name": "maria" } }
}
// Fail wrong casing
POST candidates/candidate/_search
{
"query": { "term": { "name": "Maria" } }
}
// Ok
POST candidates/candidate/_search
{
"query": { "term": { "adress.city": "Karlstad" } }
}
// Fail wrong casing
POST candidates/candidate/_search
{
"query": { "term": { "adress.city": "karlstad" } }
}
Range
POST candidates/candidate/_search
{
"query": {
"range": {
"adress.zipcode": {
"gte": 13501
}
}
}
}
POST candidates/candidate/_search
{
"query": {
"range": {
"adress.zipcode": {
"lte": 13500
}
}
}
}
Bool
// AND
POST candidates/candidate/_search
{
"query": { "bool": {
"must": [
{ "range": { "adress.zipcode": { "lt": 13501 } } },
{ "term": { "name": "maria" } }
]
}}
}
// OR
POST candidates/candidate/_search
{
"query": { "bool": {
"should": [
{ "range": { "adress.zipcode": { "lt": 13501 } } },
{ "term": { "name": "erik" } }
]
}}
}
// NOT AND
POST candidates/candidate/_search
{
"query": { "bool": {
"must_not": [
{ "term": { "name": "erik" } }
]
}}
}
Prefix
+ recall - precision
POST candidates/candidate/_search
{
"query": {
"prefix": {
"name": {
"value": "ma"
}
}
}
}
Potentially slow query, has too loop through the inverted index to look for matches
Fuzzy
+ recall - precision
POST candidates/candidate/_search
{
"query": {
"fuzzy" : { "name" : "eric" }
}
}
POST candidates/candidate/_search
{
"query": {
"fuzzy" : {
"name" : {
"value": "marios",
"fuzziness": 2
}
}
}
}
Match phrase
+ recall - precision
POST candidates/candidate/_search
{
"query": {
"match_phrase" : {
"ambition": {
"query": "jag ser det som en"
}
}
}
}Aggregations
Types
Aggregeringar together with search
Terms
GET /_search
{
"aggs" : {
"genres" : {
"terms" : { "field" : "genre" }
}
}
}Histogram
POST /sales/_search?size=0
{
"aggs" : {
"prices" : {
"histogram" : {
"field" : "price",
"interval" : 50
}
}
}
}Sorting/Pagination
Sorting
GET /my_index/my_type/_search
{
"sort" : [
{ "post_date" : {"order" : "asc"}},
{ "name" : "desc" }
],
"query" : {
"term" : { "user" : "kimchy" }
}
}RELEVANS IS LOST!
Missing values
GET /_search
{
"sort" : [
{ "price" : {"missing" : "_last"} }
],
"query" : {
"term" : { "product" : "chocolate" }
}
}Pagination
GET /_search
{
"from" : 0, "size" : 10,
"query" : {
"term" : { "user" : "kimchy" }
}
}First 10000 items