Abhishek Yadav
ரூபீ ப்ரோக்ராமர்
Co-organizer: Chennai.rb
The goal is the avoid full table scans
Example (Rails and mysql) -
-- Create an index
CREATE INDEX users_on_email ON users (email);
-- It is used for this query
select * from users where email="abhishek@example.com";
    
ActiveRecord::Migration.add_index :users, :email
User.where(email: "abhishek@example.com")
    A query requesting less data will also do a full table scan if the ORDER BY clause has no index
-- Full table scan if there is no index on created_at
EXPLAIN SELECT * from users ORDER BY created_at DESC LIMIT 10;
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows  | filtered | Extra          |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
|  1 | SIMPLE      | users | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 50019 |   100.00 | Using filesort |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
    A query with multiple columns in ORDER BY needs a composite index
-- Full table scan if there is no composite index
explain SELECT * from users ORDER BY created_at, first_name LIMIT 10;
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows  | filtered | Extra          |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
|  1 | SIMPLE      | users | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 50019 |   100.00 | Using filesort |
+----+-------------+-------+------------+------+---------------+------+---------+------+-------+----------+----------------+
    
-- create the compound index
CREATE INDEX users_on_created_at_first_name ON users (created_at, first_name);
-- Check explain
explain SELECT * from users ORDER BY created_at, first_name LIMIT 10;
+----+-------------+-------+------------+-------+---------------+--------------------------------+---------+------+------+----------+-------+
| id | select_type | table | partitions | type  | possible_keys | key                            | key_len | ref  | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+--------------------------------+---------+------+------+----------+-------+
|  1 | SIMPLE      | users | NULL       | index | NULL          | users_on_created_at_first_name | 773     | NULL |   10 |   100.00 | NULL  |
+----+-------------+-------+------------+-------+---------------+--------------------------------+---------+------+------+----------+-------+
    
-- Does not use the index on email (lower is not needed in mysql)
select * from users where lower(email)="abhishek@example.com";
    
# Leads to N+1
@users = User.featured.order(:rating).limit(20)
<% @users.each do |user| %>
  <%= user.name %> has <%= user.posts.size %> posts
<% end %>
# Eager loading
@users = User.featured.order(:rating).limit(20).includes(:posts)
@users = User.featured.order(:rating).limit(20).eager_load(:posts)
    
# Leads to N+1
@users = User.featured.order(:rating).limit(20)
<% @users.each do |user| %>
  <%= user.name %> has <%= user.posts.size %> posts
<% end %>
# Eager loading
@users = User.featured.order(:rating).limit(20).includes(:posts)
@users = User.featured.order(:rating).limit(20).eager_load(:posts)
    # This method will always fire a query. Eager loading or not
def ratings_count
  self.ratings.count
end
    # counting in Ruby is better than using SQL count
def ratings_count
  self.ratings.to_a.count
end
    
class User
  # Leads N+1s, despite the eager-loading,
  # because Rating.active is a scope that fires another query
  def active_ratings
    self.ratings.active
  end
  # A simple alternative, doesn't cause N+1
  def active_ratings2
    self.ratings{ |r| r.active? }
  end
end
    
# Pluck will always fire a query
def ratings_sum
  ratings.pluck(:stars).sum
end
# Here if ratings is eager-loaded, we avoid the extra query
def ratings_sum2
  ratings.select(&:stars).sum
end
    
# Restaurants in the city may be few
# but some of them may have too many comments or check-ins
Restaurant.in_city(city)
  .includes(:ratings => [:rater])
  .includes(:comments => [:commenter, :comment_ratings])
  .includes(:check_ins => [:customer])
    
# Get top twenty restaurants by ratings
# Doest show those without ratings
Restaurant.joins(:ratings)
    .select("restaurants.id, count(ratings.restaurant_id) as ratings_count")
    .group("ratings.restaurant_id")
    .order("ratings_count desc")
    .limit(20)
    
# Get top twenty Restaurants by recent ratings
# Doest show restaurants without ratings
Restaurant.joins(:ratings)
    .where("ratings.created_at > ?", 3.month_ago)
    .select("restaurants.id, count(ratings.restaurant_id) as ratings_count")
    .group("ratings.restaurant_id")
    .order("ratings_count desc")
    .limit(20)
    
# Get restaurants that were rated by the user'r friends
rating_ids = Rating.where(user_id: users.friend_ids)
                   .select(:restaurant_id)
Restaurant.where(id: rating_ids)
# The above goes as one query
    
# Add a column ratings_count that keeps the count
# Can use Rails's counter-cache
User.order("ratings_count desc")
    .limit(20)
    SELECT
restaurants.*,
  ROUND(
    3956 * 2 * ASIN ( SQRT (
      POWER( SIN((13.14 - restaurants.latitude)*pi()/180 / 2),2)
      + COS(orig_lat * pi()/180) 
      * COS(restaurants.latitude *pi()/180) 
      * POWER(SIN((80.12 - restaurants.longitude) *pi()/180 / 2), 2) 
    ) ),2) as distance
FROM
 restaurants;
    SELECT
restaurants.*,
  ROUND(
    3956 * 2 * ASIN ( SQRT (
      POWER( SIN((13.14 - restaurants.latitude)*pi()/180 / 2),2)
      + COS(orig_lat * pi()/180) 
      * COS(restaurants.latitude *pi()/180) 
      * POWER(SIN((80.12 - restaurants.longitude) *pi()/180 / 2), 2) 
    ) ),2) as distance
FROM
 restaurants;
    SELECT
restaurants.*,
  ROUND(
    3956 * 2 * ASIN ( SQRT (
      POWER( SIN((13.14 - restaurants.latitude)*pi()/180 / 2),2)
      + COS(orig_lat * pi()/180) 
      * COS(restaurants.latitude *pi()/180) 
      * POWER(SIN((80.12 - restaurants.longitude) *pi()/180 / 2), 2) 
    ) ),2) as distance
FROM
 restaurants;
WHERE 
 distance < 100