Abhishek Yadav
ரூபீ ப்ரோக்ராமர்
Co-organizer: Chennai.rb
The goal is the avoid full table scans
Example (Rails and mysql) -
-- Create an index
CREATE INDEX users_on_email ON users (email);
-- It is used for this query
select * from users where email="abhishek@example.com";
ActiveRecord::Migration.add_index :users, :email
User.where(email: "abhishek@example.com")
A query requesting less data will also do a full table scan if the ORDER BY clause has no index
-- Full table scan if there is no index on created_at
EXPLAIN SELECT * from users ORDER BY created_at DESC LIMIT 10;
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
| 1 | SIMPLE | users | NULL | ALL | NULL | NULL | NULL | NULL | 50019 | 100.00 | Using filesort |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
A query with multiple columns in ORDER BY needs a composite index
-- Full table scan if there is no composite index
explain SELECT * from users ORDER BY created_at, first_name LIMIT 10;
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
| 1 | SIMPLE | users | NULL | ALL | NULL | NULL | NULL | NULL | 50019 | 100.00 | Using filesort |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
-- create the compound index
CREATE INDEX users_on_created_at_first_name ON users (created_at, first_name);
-- Check explain
explain SELECT * from users ORDER BY created_at, first_name LIMIT 10;
+----+-------------+-------+------------+-------+---------------+--------------------------------+---------+------+------+----------+-------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+--------------------------------+---------+------+------+----------+-------+
| 1 | SIMPLE | users | NULL | index | NULL | users_on_created_at_first_name | 773 | NULL | 10 | 100.00 | NULL |
+----+-------------+-------+------------+-------+---------------+--------------------------------+---------+------+------+----------+-------+
-- Does not use the index on email (lower is not needed in mysql)
select * from users where lower(email)="abhishek@example.com";
# Leads to N+1
@users = User.featured.order(:rating).limit(20)
<% @users.each do |user| %>
<%= user.name %> has <%= user.posts.size %> posts
<% end %>
# Eager loading
@users = User.featured.order(:rating).limit(20).includes(:posts)
@users = User.featured.order(:rating).limit(20).eager_load(:posts)
# Leads to N+1
@users = User.featured.order(:rating).limit(20)
<% @users.each do |user| %>
<%= user.name %> has <%= user.posts.size %> posts
<% end %>
# Eager loading
@users = User.featured.order(:rating).limit(20).includes(:posts)
@users = User.featured.order(:rating).limit(20).eager_load(:posts)
# This method will always fire a query. Eager loading or not
def ratings_count
self.ratings.count
end
# counting in Ruby is better than using SQL count
def ratings_count
self.ratings.to_a.count
end
class User
# Leads N+1s, despite the eager-loading,
# because Rating.active is a scope that fires another query
def active_ratings
self.ratings.active
end
# A simple alternative, doesn't cause N+1
def active_ratings2
self.ratings{ |r| r.active? }
end
end
# Pluck will always fire a query
def ratings_sum
ratings.pluck(:stars).sum
end
# Here if ratings is eager-loaded, we avoid the extra query
def ratings_sum2
ratings.select(&:stars).sum
end
# Restaurants in the city may be few
# but some of them may have too many comments or check-ins
Restaurant.in_city(city)
.includes(:ratings => [:rater])
.includes(:comments => [:commenter, :comment_ratings])
.includes(:check_ins => [:customer])
# Get top twenty restaurants by ratings
# Doest show those without ratings
Restaurant.joins(:ratings)
.select("restaurants.id, count(ratings.restaurant_id) as ratings_count")
.group("ratings.restaurant_id")
.order("ratings_count desc")
.limit(20)
# Get top twenty Restaurants by recent ratings
# Doest show restaurants without ratings
Restaurant.joins(:ratings)
.where("ratings.created_at > ?", 3.month_ago)
.select("restaurants.id, count(ratings.restaurant_id) as ratings_count")
.group("ratings.restaurant_id")
.order("ratings_count desc")
.limit(20)
# Get restaurants that were rated by the user'r friends
rating_ids = Rating.where(user_id: users.friend_ids)
.select(:restaurant_id)
Restaurant.where(id: rating_ids)
# The above goes as one query
# Add a column ratings_count that keeps the count
# Can use Rails's counter-cache
User.order("ratings_count desc")
.limit(20)
SELECT
restaurants.*,
ROUND(
3956 * 2 * ASIN ( SQRT (
POWER( SIN((13.14 - restaurants.latitude)*pi()/180 / 2),2)
+ COS(orig_lat * pi()/180)
* COS(restaurants.latitude *pi()/180)
* POWER(SIN((80.12 - restaurants.longitude) *pi()/180 / 2), 2)
) ),2) as distance
FROM
restaurants;
SELECT
restaurants.*,
ROUND(
3956 * 2 * ASIN ( SQRT (
POWER( SIN((13.14 - restaurants.latitude)*pi()/180 / 2),2)
+ COS(orig_lat * pi()/180)
* COS(restaurants.latitude *pi()/180)
* POWER(SIN((80.12 - restaurants.longitude) *pi()/180 / 2), 2)
) ),2) as distance
FROM
restaurants;
SELECT
restaurants.*,
ROUND(
3956 * 2 * ASIN ( SQRT (
POWER( SIN((13.14 - restaurants.latitude)*pi()/180 / 2),2)
+ COS(orig_lat * pi()/180)
* COS(restaurants.latitude *pi()/180)
* POWER(SIN((80.12 - restaurants.longitude) *pi()/180 / 2), 2)
) ),2) as distance
FROM
restaurants;
WHERE
distance < 100