Tumblr
S3
Posts + Likes
Dictionary post_id to parrot_id
ElasticSearch Posts
SIT
data
Mappings
+-----------------------+-------------------+
|game_title |posts_cnt_daily_avg|
+-----------------------+-------------------+
|D |10000 |
|Baldur's Gate |10000 |
|Forced |10000 |
|Hearts |10000 |
|Sky |10000 |
|Air |10000 |
|LoveR |10000 |
|Spider |10000 |
|Journey |10000 |
|Blood |10000 |
|Snake |9723 |
|Stray |9499 |
|720° |8781 |
|SiN |8125 |
|Bless |7893 |
|Hatred |7745 |
|The Forest |7119 |
+-----------------------+-------------------+
{
"query": {
"multi_match": {
"query": "Resident Evil",
"fields": [
"g", // tags
"t", // title
"h", // caption
"b" // body
],
"type": "phrase"
}
}
}
{
"query": {
"term": {
"g.array": {
"value": "Blood game",
"case_insensitive": true
}
}
}
}
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Air",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "visual novel",
"fields": ["g","t","h","b"],
"type": "phrase"
}
}
]
}
}
}
Works as AND
{
"query": {
"bool": {
"should": [
{
"term": {
"g.array": {
"value": "blood game",
"case_insensitive": true
}
}
},
{
"term": {
"g.array": {
"value": "caleb blood",
"case_insensitive": true
}
}
}
],
"minimum_should_match": "1"
}
}
}
Works as OR
Phrase "Obscure game"
and (
Phrase "Josh Carter"
or Phrase "Stan Jones"
or Phrase "Kenny Matthew"
or Phrase "Shannon Matthews"
or Phrase "DreamCatcher Interactive"
or Phrase "Hydravision Entertainment"
or Phrase "MC2 Microids"
)
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Obscure game",
...
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "Josh Carter",
...
}
},
{
"multi_match": {
"query": "Stan Jones",
...
}
},
...
],
"minimum_should_match": "1"
}
}
]
}
}
}
{
"track_total_hits": true,
"size": 100000,
"_source": {
"includes": ["i","b","g","h","r","t"]
},
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Obscure game",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "Josh Carter",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Stan Jones",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Kenny Matthew",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Shannon Matthews",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "DreamCatcher Interactive",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Hydravision Entertainment",
"fields": ["g","t","h","b"],
"type": "phrase"
}
},
{
"multi_match": {
"query": "MC2 Microids",
"fields": ["g","t","h","b"],
"type": "phrase"
}
}
],
"minimum_should_match": "1"
}
}
]
}
}
}
Phrase "Obscure game"
and (
Phrase "Josh Carter"
or Phrase "Stan Jones"
or Phrase "Kenny Matthew"
or Phrase "Shannon Matthews"
or Phrase "DreamCatcher Interactive"
or Phrase "Hydravision Entertainment"
or Phrase "MC2 Microids"
)
1. Phrase
2. Tag
3. And
4. Or
5. ()
p"value"
t"tag"
t"foo" and p"bar"
t"foo" or p"bar"
t"foo" and (p"bar" or t"baz")
interface TumblrMappingExpr {
@Data
class Phrase implements TumblrMappingExpr {
String value;
}
@Data
class Tag implements TumblrMappingExpr {
String value;
}
@Data
class And implements TumblrMappingExpr {
List<TumblrMappingExpr> expressions;
}
@Data
class Or implements TumblrMappingExpr {
List<TumblrMappingExpr> expressions;
}
}
sealed interface TumblrMappingExpr {
record Phrase(String value) implements TumblrMappingExpr {}
record Tag(String value) implements TumblrMappingExpr {}
record And(List<TumblrMappingExpr> expressions) implements TumblrMappingExpr {}
record Or(List<TumblrMappingExpr> expressions) implements TumblrMappingExpr {}
}
sealed trait TumblrMappingExpr
object TumblrMappingExpr {
case class Phrase(value: String) extends TumblrMappingExpr
case class Tag(value: String) extends TumblrMappingExpr
case class And(expressions: List[TumblrMappingExpr]) extends TumblrMappingExpr
case class Or(expressions: List[TumblrMappingExpr]) extends TumblrMappingExpr
}
enum TumblrMappingExpr {
case Phrase(value: String)
case Tag(value: String)
case And(expressions: List[TumblrMappingExpr])
case Or(expressions: List[TumblrMappingExpr])
}
class Phrase {
value: string;
}
class Tag {
value: string;
}
class And {
expressions: TumblrMappingExpr[];
}
class Or {
expressions: TumblrMappingExpr[];
}
type TumblrMappingExpr = Phrase | Tag | And | Or;
Type 0: Unrestricted Grammar — the most wide and complex
Type 1: Context-Sensitive Grammar — complex grammar, parser should have a state
Type 2: Context-Free Grammar — relatively simple grammars
Type 3: Regular Grammar — the simplest case, can be parsed with regexps
1. Manually written parser
2. Regexps
TumblrMappingExprGrammar
TumblrMappingExprParserTest
p"bless online"
or p"bless unleashed"
or (p"bless" and (p"mmo" or p"rpg" or p"mmorpg"))
(p"Spider-Man" or p"Spider Man")
and (p"video game" or p"game")
and p"2018"
t"Dante's Inferno game" or (
p"Dante's Inferno" and (
p"video game"
or p"Visceral Games"
or p"EA"
or p"Electronic Arts"
or p"xbox"
or p"PlayStation"
or (p"game" and p"2010")
)
)
TumblrMappingExprToESquery
+--------------+------+
|game_title |posts |
+--------------+------+
|D |10000 |
|Baldur's Gate |10000 |
|Forced |10000 |
|Hearts |10000 |
|Sky |10000 |
|Air |10000 |
|LoveR |10000 |
|Spider |10000 |
|Journey |10000 |
|Blood |10000 |
|Snake |9723 |
|Stray |9499 |
|720° |8781 |
|SiN |8125 |
|Bless |7893 |
|Hatred |7745 |
|The Forest |7119 |
+--------------+------+
+-------------------+------+
|game_title |posts |
+-------------------+------+
|Baldur's Gate III |21429 |
|Minecraft |6729 |
|Pikmin |5168 |
|Genshin Impact |5036 |
|Undertale |4472 |
|Splatoon |3936 |
|Disco Elysium |3569 |
|The Sims |3520 |
|Deltarune |3241 |
|Animal Crossing |2990 |
|Elden Ring |2440 |
|Stardew Valley |1943 |
|Braid |1873 |
|The Last of Us |1798 |
|Fire Emblem |1349 |
|The Legend of Zelda|1347 |
|Honkai: Star Rail |1175 |
+-------------------+------+