Elasticsearch provides a lot of filters. We gonna use:
PUT /synonym_test
{
"settings": {
"index": {
"max_ngram_diff": 99,
"analysis": {
"analyzer": {
"synonym_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"synonym"
]
},
"autocomplete_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"unlimited, endlessness => infinity",
"chaos, conflict, bloodshed => war"
]
},
"autocomplete_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 20
}
}
}
}
}
}
"synonyms_path" : "synonym.txt"
PUT /synonym_test
{
"settings": {
"index": {
"max_ngram_diff": 99,
"analysis": {
"analyzer": {
"synonym_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"synonym"
]
},
"autocomplete_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"unlimited, endlessness => infinity",
"chaos, conflict, bloodshed => war"
]
},
"autocomplete_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 20
}
}
}
}
}
}
PUT /synonym_test
{
"settings": {
"index": {
"max_ngram_diff": 99,
"analysis": {
"analyzer": {
"synonym_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"synonym"
]
},
"autocomplete_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"unlimited, endlessness => infinity",
"chaos, conflict, bloodshed => war"
]
},
"autocomplete_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 20
}
}
}
}
}
}
We will use Whitespace Tokenizer to breaks text into terms whenever it encounters a whitespace character.
Then, we will implement Custom Analyzer to combine several filter.
We will build two separate analyzer, autocomplete_analyzer at index time, and synonym_analyzer at search time. This is because we want to implement Ngram filter on our synonym_analyzer.
PUT /synonym_test
{
"settings": {
"index": {
"max_ngram_diff": 99,
"analysis": {
"analyzer": {
"synonym_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"synonym"
]
},
"autocomplete_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"unlimited, endlessness => infinity",
"chaos, conflict, bloodshed => war"
]
},
"autocomplete_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 20
}
}
}
}
}
}
PUT /synonym_test
{
"settings": {
"index": {
"max_ngram_diff": 99,
"analysis": {
"analyzer": {
"synonym_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"synonym"
]
},
"autocomplete_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"unlimited, endlessness => infinity",
"chaos, conflict, bloodshed => war"
]
},
"autocomplete_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 20
}
}
}
}
}
}
PUT /synonym_test/_mapping/doc
{
"properties": {
"movie_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"year": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"subtitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"weight": {
"type": "integer",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
PUT /synonym_test/_mapping/doc
{
"properties": {
"movie_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"year": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"subtitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"weight": {
"type": "integer",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
POST /synonym_test/doc
{
"movie_name": "marvels INFINITY WAR",
"year" : "2018",
"subtitle" : "english",
"weight": 5
}
POST /synonym_test/doc
{
"movie_name": "marvels thor",
"year" : "2014",
"subtitle" : "malay",
"weight": 1
}
POST /synonym_test/doc
{
"movie_name": "marvels the avengers",
"year" : "2016",
"subtitle" : "chinese",
"weight": 3
}
GET /synonym_test/_search
{
"query": {
"multi_match" : {
"query": "marv",
"fields": [ "movie_name", "year", "subtitle" ]
}
}
}
GET /synonym_test/_search
{
"query": {
"multi_match" : {
"query": "bloodshed ENDLESSNESS",
"fields": [ "movie_name", "year", "subtitle" ]
}
}
}
GET /synonym_test/_search
{
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "inf",
"fields": [ "movie_name", "year", "subtitle" ]
}
},
"field_value_factor": {
"field": "weight",
"modifier": "log1p"
}
}
}
}
GET /synonym_test/_search
{
"query": {
"multi_match" : {
"query": "marv",
"type": "phrase",
"fields": [ "movie_name", "year", "subtitle" ]
}
}
}
GET /synonym_test/_search
{
"query": {
"multi_match" : {
"query": "marv",
"fields": [ "movie_name", "year", "subtitle" ],
"fuzziness": "auto"
}
}
}
1. Index name: city_weight
2. Settings:
3. Mapping:
{
"template": "city_weight",
"order": 1,
"settings": {
"index": {
"max_ngram_diff": 99,
"analysis": {
"analyzer": {
"synonym_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"synonym"
]
},
"autocomplete_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms" : [
"AYER => AIR",
"ALUR => ALOR",
"AMPG => AMPANG",
"EMPAT => AMPAT",
"HANTU => ANTU",
"ASHN => ASAHAN",
"ATS => ATAS",
"AIR => AYER",
"BGN => BAGAN",
"BARU, BAHARU, BHARU => BAHRU",
"BLK, BALEK => BALIK",
"BDR, B., BNDR => BANDAR"
]
},
"autocomplete_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 20
}
}
}
}
},
"mappings": {
"doc": {
"properties": {
"city_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"filter": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "autocomplete_analyzer",
"search_analyzer": "synonym_analyzer"
},
"weight": {
"type": "long",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
}
4. Query:
GET /city_weight/_search
{
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "kuala",
"fields": [ "city_name", "state", "filter" ]
}
},
"field_value_factor": {
"field": "weight"
},
"boost_mode": "max"
}
}
}
1. Query:
GET /city_weight/_search
{
"query": {
"multi_match" : {
"query": "pahag",
"fields": [ "city_name", "state", "filter" ],
"fuzziness": "auto"
}
}
}
const elasticsearch = require('elasticsearch');
const client = new elasticsearch.Client({
host : '103.245.90.189:3002',
});
const index = 'city_weight';
const simpleQuery = async () => {
const response = await client.search({
index: index,
body: {
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "pahang",
"type": "phrase",
"fields": [ "city_name", "state", "filter" ]
}
},
"field_value_factor": {
"field": "weight" }
}
}
},
});
try{
res = response;
console.dir(res, {depth:null, colors:true })
} catch (error) {
console.log(error.message)
}
}
simpleQuery();