Tumblr
S3
Posts + Likes
Dictionary post_id to parrot_id
ElasticSearch Posts
SIT
data
Mappings
+-----------------------+-------------------+
|game_title |posts_cnt_daily_avg|
+-----------------------+-------------------+
|D |10000 |
|Baldur's Gate |10000 |
|Forced |10000 |
|Hearts |10000 |
|Sky |10000 |
|Air |10000 |
|LoveR |10000 |
|Spider |10000 |
|Journey |10000 |
|Blood |10000 |
|Snake |9723 |
|Stray |9499 |
|720° |8781 |
|SiN |8125 |
|Bless |7893 |
|Hatred |7745 |
|The Forest |7119 |
+-----------------------+-------------------+{
"query": {
"multi_match": {
"query": "Resident Evil",
"fields": [
"g", // tags
"t", // title
"h", // caption
"b" // body
],
"type": "phrase"
}
}
}{
"query": {
"term": {
"g.array": {
"value": "Blood game",
"case_insensitive": true
}
}
}
}{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Air",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "visual novel",
"fields": ["g","t","h","b"],
"type": "phrase"
}
}
]
}
}
}Works as AND
{
"query": {
"bool": {
"should": [
{
"term": {
"g.array": {
"value": "blood game",
"case_insensitive": true
}
}
},
{
"term": {
"g.array": {
"value": "caleb blood",
"case_insensitive": true
}
}
}
],
"minimum_should_match": "1"
}
}
}Works as OR
Phrase "Obscure game"
and (
Phrase "Josh Carter"
or Phrase "Stan Jones"
or Phrase "Kenny Matthew"
or Phrase "Shannon Matthews"
or Phrase "DreamCatcher Interactive"
or Phrase "Hydravision Entertainment"
or Phrase "MC2 Microids"
){
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Obscure game",
...
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "Josh Carter",
...
}
},
{
"multi_match": {
"query": "Stan Jones",
...
}
},
...
],
"minimum_should_match": "1"
}
}
]
}
}
}{
"track_total_hits": true,
"size": 100000,
"_source": {
"includes": ["i","b","g","h","r","t"]
},
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Obscure game",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "Josh Carter",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Stan Jones",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Kenny Matthew",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Shannon Matthews",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "DreamCatcher Interactive",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Hydravision Entertainment",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "MC2 Microids",
"fields": ["g","t","h","b"],
"type": "phrase"
}
}
],
"minimum_should_match": "1"
}
}
]
}
}
}Phrase "Obscure game"
and (
Phrase "Josh Carter"
or Phrase "Stan Jones"
or Phrase "Kenny Matthew"
or Phrase "Shannon Matthews"
or Phrase "DreamCatcher Interactive"
or Phrase "Hydravision Entertainment"
or Phrase "MC2 Microids"
)1. Phrase
2. Tag
3. And
4. Or
5. ()
p"value"
t"tag"
t"foo" and p"bar"
t"foo" or p"bar"
t"foo" and (p"bar" or t"baz")
interface TumblrMappingExpr {
@Data
class Phrase implements TumblrMappingExpr {
String value;
}
@Data
class Tag implements TumblrMappingExpr {
String value;
}
@Data
class And implements TumblrMappingExpr {
List<TumblrMappingExpr> expressions;
}
@Data
class Or implements TumblrMappingExpr {
List<TumblrMappingExpr> expressions;
}
}sealed interface TumblrMappingExpr {
record Phrase(String value) implements TumblrMappingExpr {}
record Tag(String value) implements TumblrMappingExpr {}
record And(List<TumblrMappingExpr> expressions) implements TumblrMappingExpr {}
record Or(List<TumblrMappingExpr> expressions) implements TumblrMappingExpr {}
}sealed trait TumblrMappingExpr
object TumblrMappingExpr {
case class Phrase(value: String) extends TumblrMappingExpr
case class Tag(value: String) extends TumblrMappingExpr
case class And(expressions: List[TumblrMappingExpr]) extends TumblrMappingExpr
case class Or(expressions: List[TumblrMappingExpr]) extends TumblrMappingExpr
}enum TumblrMappingExpr {
case Phrase(value: String)
case Tag(value: String)
case And(expressions: List[TumblrMappingExpr])
case Or(expressions: List[TumblrMappingExpr])
}class Phrase {
value: string;
}
class Tag {
value: string;
}
class And {
expressions: TumblrMappingExpr[];
}
class Or {
expressions: TumblrMappingExpr[];
}
type TumblrMappingExpr = Phrase | Tag | And | Or;
Type 0: Unrestricted Grammar — the most wide and complex
Type 1: Context-Sensitive Grammar — complex grammar, parser should have a state
Type 2: Context-Free Grammar — relatively simple grammars
Type 3: Regular Grammar — the simplest case, can be parsed with regexps
1. Manually written parser
2. Regexps
TumblrMappingExprGrammar
TumblrMappingExprParserTest
p"bless online"
or p"bless unleashed"
or (p"bless" and (p"mmo" or p"rpg" or p"mmorpg"))
(p"Spider-Man" or p"Spider Man")
and (p"video game" or p"game")
and p"2018"t"Dante's Inferno game" or (
p"Dante's Inferno" and (
p"video game"
or p"Visceral Games"
or p"EA"
or p"Electronic Arts"
or p"xbox"
or p"PlayStation"
or (p"game" and p"2010")
)
)TumblrMappingExprToESquery
+--------------+------+
|game_title |posts |
+--------------+------+
|D |10000 |
|Baldur's Gate |10000 |
|Forced |10000 |
|Hearts |10000 |
|Sky |10000 |
|Air |10000 |
|LoveR |10000 |
|Spider |10000 |
|Journey |10000 |
|Blood |10000 |
|Snake |9723 |
|Stray |9499 |
|720° |8781 |
|SiN |8125 |
|Bless |7893 |
|Hatred |7745 |
|The Forest |7119 |
+--------------+------++-------------------+------+
|game_title |posts |
+-------------------+------+
|Baldur's Gate III |21429 |
|Minecraft |6729 |
|Pikmin |5168 |
|Genshin Impact |5036 |
|Undertale |4472 |
|Splatoon |3936 |
|Disco Elysium |3569 |
|The Sims |3520 |
|Deltarune |3241 |
|Animal Crossing |2990 |
|Elden Ring |2440 |
|Stardew Valley |1943 |
|Braid |1873 |
|The Last of Us |1798 |
|Fire Emblem |1349 |
|The Legend of Zelda|1347 |
|Honkai: Star Rail |1175 |
+-------------------+------+