class AddPublishedToPlayerMove < ActiveRecord::Migration
def up
add_column :player_moves, :published, :boolean
PlayerMove.reviews.each(&:update_published).each &:save
end
def down
remove_column :player_moves, :published
end
end
Pro: easy to write and understand
Con: *very* slow
class AddPublishedToPlayerMove < ActiveRecord::Migration
def up
add_column :player_moves, :published, :boolean
PlayerMove.reviews.find_each do |p|
p.update_published
p.save
end
end
def down
remove_column :player_moves, :published
end
end
Pro: won't consume all your memory
class AddPublishedToPlayerMove < ActiveRecord::Migration
def up
add_column :player_moves, :published, :boolean
PlayerMove.reviews.find_in_batches do |batch|
batch.each do |p|
p.update_published
p.save
end
GC.start # ✨
end
end
def down
remove_column :player_moves, :published
end
end
Pro: allows you to manually run garbage collection
If you have over 100k records in the table, you need to worry about performance.
2 ms for database read
+ 3 ms for Ruby
+ 2 ms for database write
= 7 ms per record
* 100k records
class AddPublishedToPlayerMove < ActiveRecord::Migration
def up
add_column :player_moves, :published, :boolean
PlayerMove.reviews.update_all published: true
end
def down
remove_column :player_moves, :published
end
end
Pros: runs a single SQL UPDATE, can accept custom conditions
Cons: doesn't run callbacks and validations
class CreateReviews < ActiveRecord::Migration
def up
create_table :reviews # ...
file = File.expand_path('reviews.csv', __dir__)
reviews = CSV.read(file, headers: true).map do |row|
[row['content'], row['rating'], row['user_id']]
end
Review.import [:content, :rating, :user_id], reviews
end
def down
drop_table :reviews
end
end
Pro: easy to quickly import a ton of data without writing SQL yourself
Con: still creates a single query for each record imported
def up
create_table :incentives do |t|
t.integer :amount, default: 0, null: false
t.string :status
t.timestamp :sent_at
t.references :resident, null: false, index: true
t.references :property, null: false, index: true
t.timestamps
end
insert <<-SQL.squish
insert into incentives
(resident_id, property_id, amount, status, sent_at)
select id, property_id, incentive_amount, incentive_status, incentive_sent_at
from residents where incentive_amount > 0
SQL
remove_columns :residents, :incentive_amount, :incentive_status, :incentive_sent_at
end
Pros: blazingly fast, timing & total records changed logged to console
Cons: hard to understand, easy to mess up
# Removing duplicate records from a table with millions of rows
def up
delete <<-SQL.squish
delete from residents
where id in (
select id
from (
select id,
row_number() over (partition by email, property_id order by id)
from residents
) t
where t.row_number > 1
)
SQL
delete <<-SQL.squish
delete from residents where email = 'undefined'
SQL
add_index :residents, [:property_id, :email], unique: true
end
Pro: can handle hard problems in seconds that would take a Ruby script hours / days to complete
saver = -> action {
# ...
}
Action.order(:id).limit(100_000).each &saver
Action.order(:id).limit(100_000).offset(100_000).each &saver
Action.order(:id).limit(100_000).offset(200_000).each &saver
Action.order(:id).limit(100_000).offset(300_000).each &saver
Action.order(:id).limit(100_000).offset(400_000).each &saver
Action.order(:id).limit(100_000).offset(500_000).each &saver
Action.order(:id).limit(100_000).offset(600_000).each &saver
When modifying tens of millions of rows
select count(*) from users where email ~* '^f|t$';
update player_moves set content = to_json(feedbacks)
from feedbacks where player_move_id = player_moves.id;
with counts as (
select player_id,
count(*) filter (where site = 'facebook') as facebook,
count(*) filter (where site = 'google') as google
from friends
group by player_id
)
update players set facebook_friends = facebook, google_friends = google
from counts where player_id = players.id;
# select count(*) from versions where item_type = 'Authentication'
# and object_changes->>'s3_image' is not null;
# count
# --------
# 317816
#
# select count(*) from versions where item_type = 'Authentication'
# and object_changes->>'s3_image' is not null
# and object_changes - 's3_image' - 'updated_at' = '{}'::jsonb;
# count
# --------
# 114834
delete <<-SQL.squish
delete from versions where item_type = 'Authentication'
and object_changes->>'s3_image' is not null
and object_changes - 's3_image' - 'updated_at' = '{}'::jsonb
SQL
update <<-SQL.squish
update versions
set object_changes = object_changes - 's3_image'
where item_type = 'Authentication'
and object_changes->>'s3_image' is not null
SQL
update <<-SQL.squish
update versions
set object = object - 's3_image'
where item_type = 'Authentication'
and object->>'s3_image' is not null
SQL