‎Index optimization‎

from B-tree to ICP

YAPC::EU 2016

Follow this talk

Who I am

Pavel Scherbinin (Dzirtik)

  • Leader of Moscow.PM
  • Organisator YAPC::Russia
  • Perl programmer (8+ years)
  • Team Lead
  • DBA
  • Lecturer

Technopark @ Mail.Ru

Bauman State University + Mail.Ru

  • 2 years
  • 14 courses
  • 60 students every half-year
  • additional courses:

         Perl!!!

         ...

 

https://park.mail.ru/

Indexes

Phonebook

B-Tree

Multiple-Column Indexes

CREATE TABLE People (​
    id int(11) autoincrement,
    last_name varchar(50) not null,​
    first_name varchar(50) not null,​
    dob date not null,​
    gender enum(m, f) not null,​
    primary key (id),
    key(last_name, first_name, dob)​
);​

Multiple-Column Indexes

Multiple-Column Indexes

Text

Multiple-Column Indexes

  • Akroyd
    • Christian
      • 1958-12-07
    • Debbie
      • 1978-11-02
    • ...
  • Allen
    • Cuba
      • 1960-01-01
    • Kim
      • 1930-07-12
    • ...
  • ...
  • Basinger
    • Viven
      • 1976-12-08
      • 1979-01-24

What we can do

  • Search by full equal
  • Search by left prefix
  • Search by column prefix
  • Search by range
  • Search by full equal plus by range
  • Queries only by index (covered indexes)

Search by full equal

SELECT 
    COUNT(*)
FROM 
    topics
WHERE 
    forum_id = 2;

-- index(forum_id)

Search by full equal

SELECT 
    COUNT(*)
FROM 
    topics
WHERE 
    forum_id = 2 
    AND author = "ex@ex.com";

-- index(forum_id, author) 
-- || index(author, forum_id)

Search by left prefix

-- index(forum_id, author)

SELECT COUNT(*) FROM topics
WHERE forum_id = 2;
-- using index

SELECT COUNT(*) FROM topics
WHERE  author = "ex@ex.com"; 
-- not using index

Search by column prefix

-- index(title)

SELECT COUNT(*) FROM topics
WHERE title LIKE 'help%';
-- using index

SELECT COUNT(*) FROM topics
WHERE  title LIKE '%help%'; 
-- not using index

Search by range

-- index(forum_id)

SELECT COUNT(*) FROM topics
WHERE forum_id > 500;

SELECT COUNT(*) FROM topics
WHERE  forum_id BETWEEN 500 AND 750; 

Search by full equal plus by range

-- index(forum_id, replies_cnt)

SELECT COUNT(*) FROM topics
WHERE forum_id = 500 
      AND replies_cnt > 1000;
-- using index
 
SELECT COUNT(*) FROM topics
WHERE replies_cnt > 100; 
-- not using index

What we can't do

  • Search without left part of index
  • Skip colums
  • Search after range seek

Search without left part of index

-- index(
--   forum_id, replies_cnt, author)

SELECT COUNT(*) FROM topics
WHERE replies_cnt > 1000;
-- not using index
 
SELECT COUNT(*) FROM topics
WHERE author = "ex@ex.com"; 
-- not using index

Skip colums

-- index(
--   forum_id, replies_cnt, author)

SELECT COUNT(*) FROM topics
WHERE forum_id = 500 
      AND author = "ex@ex.com";
-- using only forum_id part

Search after range seek

-- index(
--   forum_id, replies_cnt, author)

SELECT COUNT(*) FROM topics
WHERE forum_id > 500 
      AND replies_cnt > 1000;
-- using only forum_id part

Index Condition Pushdown

Index Condition Pushdown

-- index(
--   forum_id, replies_cnt, author)

SELECT COUNT(*) FROM topics
WHERE forum_id > 500 
      AND replies_cnt > 1000;
-- since MySQL 5.6
-- take forum_id rows and after then 
-- scan info about replies_cnt
-- in index

Index Condition Pushdown

-- index(forum_id, title)

SELECT COUNT(*) FROM topics
WHERE forum_id > 500 
      AND title LIKE '%help%';

When we use indexes

  • Search by WHERE​

  • JOIN tables

  • Sorting and grouping tables (ORDER BY & GROUP BY)​

  • MAX and MIN for key fields

  • To retrieve data not from the table data, and from only the index file (covering indexes)

Search by WHERE​

Search by WHERE

-- index(???)

SELECT COUNT(*) FROM topics
WHERE title LIKE '%help%' 
      AND replies_cnt > 100;

Search by WHERE

-- index(replies_cnt, title)

SELECT COUNT(*) FROM topics
WHERE title LIKE '%help%' 
      AND replies_cnt > 100;

Search by WHERE

-- index(???)

SELECT COUNT(*) FROM topics
WHERE forum_id > 500 
      AND replies_cnt > 100;

Search by WHERE

-- index(forum_id, replies_cnt)
-- index(replies_cnt, forum_id) too

SELECT COUNT(*) FROM topics
WHERE forum_id > 500 
      AND replies_cnt > 100;

JOIN tables

JOIN tables

JOIN tables

SELECT 
    tbl1.col1, tbl2.col2
FROM tbl1 
    INNER JOIN tbl2 USING(col3)
WHERE 
    tbl1.col1 IN (5, 6);

JOIN tables

outer_iter = iterator over tbl1 where col1 IN(5,6)
outer_row = outer_iter.next
while outer_row
	inner_iter = iterator over tbl2 where col3 = outer_row.col3
	inner_row = inner_iter.next
	while inner_row
		output [ outer_row.col1, inner_row.col2 ]
		inner_row = inner_iter.next
	end
	outer_row = outer_iter.next
end

JOIN tables

outer_iter = iterator over tbl1 where col1 IN(5,6)
outer_row = outer_iter.next
while outer_row
	inner_iter = iterator over tbl2 where col3 = outer_row.col3
	inner_row = inner_iter.next
	if inner_row
		while inner_row
			output [ outer_row.col1, inner_row.col2 ]
			inner_row = inner_iter.next
		end
	else
		output [ outer_row.col1, NULL ]
	end
	outer_row = outer_iter.next
end

JOIN tables

JOIN tables

JOIN tables

SELECT COUNT(*)
FROM forums f
INNER JOIN topics t ON (f.id = t.fid)
WHERE f.id = 5 AND t.replies_cnt > 500;

-- indexes:
-- forums (???)
-- topics (???)

JOIN tables

SELECT COUNT(*)
FROM forums f
INNER JOIN topics t ON (f.id = t.fid)
WHERE f.id = 5 AND t.replies_cnt > 500;

-- indexes:
-- forums (id)
-- topics (fid, replies_cnt)

JOIN tables

SELECT COUNT(*)
FROM forums f
LEFT JOIN topics t ON (f.id = t.fid)
WHERE f.id = 5;

-- indexes:
-- forums (???)
-- topics (???)

JOIN tables

SELECT COUNT(*)
FROM forums f
LEFT JOIN topics t 
    ON (f.id = t.fid) 
    AND t.replies_cnt > 500
WHERE f.id = 5;

-- indexes:
-- forums (???)
-- topics (???)

JOIN tables

SELECT COUNT(*)
FROM forums f
LEFT JOIN topics t 
    ON (f.id = t.fid) 
WHERE f.id = 5 AND t.replies_cnt > 500;

-- indexes:
-- forums (???)
-- topics (???)

Sorting and grouping tables

SELECT COUNT(*)
FROM topics
WHERE forum_id = 42
ORDER BY created_at DESC;

-- index (forum_id, created_st)
-- the same for ASC

Sorting and grouping tables

SELECT COUNT(*)
FROM topics
WHERE forum_id = 42
GROUP BY created_at;

-- index (forum_id, created_st)

Sorting and grouping tables

SELECT COUNT(*)
FROM topics
WHERE forum_id = 42
GROUP BY created_at;

-- index (forum_id, created_st)

MAX and MIN for key fields

SELECT MAX(replies_cnt)
FROM topics
WHERE forum_id = 42

-- index (forum_id, replies_cnt)

Covering Indexes

  • Compact
  • Sorted
  • Cached

Covering Indexes

SELECT author
FROM topics
WHERE forum_id = 42
   AND replies_cnt > 300;

-- index (???)

Clustered index

Clustered index

Clustered index

Clustered index

Clustered index

Clustered index

Clustered index

The Story about News

id time body ...
id time title ...

news

story

The Story about News

id time body ...
id time title ...

news

story

news_id story_id

sn

The Story about News

id time body ...
id time title ...

news

story

news_id story_id

sn

SELECT * 
FROM story 
     INNER JOIN sn ON story.id = sn.story_id 
     INNER JOIN news ON sn.news_id = news.id
WHERE story.id = ? 
ORDER BY news.time;

The Story about News

id time body ...
id time title ...

news

story

news_id story_id

sn

SELECT * 
FROM sn  
     INNER JOIN news ON sn.news_id = news.id
WHERE sn.story_id = ? 
ORDER BY news.time;

The Story about News

id time body ...
id time title ...

news

story

news_id story_id

sn

SELECT news_id FROM sn
WHERE sn.story_id = ?;
FOR EACH ROW:
    SELECT * FROM news
    WHERE news.id = ?;
SORT BY time

The Story about News

id time body ...
id time title ...

news (id)

story

news_id story_id

sn(story_id, news_id)

SELECT news_id FROM sn
WHERE sn.story_id = ?;
FOR EACH ROW:
    SELECT * FROM news
    WHERE news.id = ?;
SORT BY time

The Story about News

id time body ...
id time title ...

news (id)

story

news_id story_id time

sn(story_id, time, news_id)

SELECT news_id FROM sn
WHERE sn.story_id = ?
ORDER BY time;
FOR EACH ROW:
    SELECT * FROM news
    WHERE news.id = ?;

The Story about News

id time body ...
id time title ...

news (id)

story

news_id story_id time

sn(story_id, time, news_id) + FK ON news(id, time)

SELECT news_id FROM sn
WHERE sn.story_id = ?
ORDER BY time;
FOR EACH ROW:
    SELECT * FROM news
    WHERE news.id = ?;

Follow me:

dzirtik

dzirtik

dzirtik

QUESTIONS?

Made with Slides.com