@acrogenesis
require "benchmark"
num_rows = 100_000
num_cols = 10
data = Array.new(num_rows) { Array.new(num_cols) { "x"*1000 } }
time = Benchmark.realtime do
csv = data.map { |row| row.join(",") }.join("\n")
end
puts time.round(2)
Execution time: 2.09s
Concatenating 100,000 rows, 10 columns
require "benchmark"
num_rows = 100000
num_cols = 10
data = Array.new(num_rows) { Array.new(num_cols) { "x"*1000 } }
GC.disable
time = Benchmark.realtime do
csv = data.map { |row| row.join(",") }.join("\n")
end
puts time.round(2)
Execution time: 1.16s
Our program spends 44.5% of the time in the garbage collector
Ruby < 2.1
Ruby ≥ 2.1
mark-and-sweep, stop-the-world
restricted generational
require "benchmark"
num_rows = 100000
num_cols = 10
data = Array.new(num_rows) { Array.new(num_cols) { "x"*1000 } }
puts "%d MB" % (`ps -o rss= -p #{Process.pid}`.to_i/1024)
GC.disable
time = Benchmark.realtime do
csv = data.map { |row| row.join(",") }.join("\n")
end
puts "%d MB" % (`ps -o rss= -p #{Process.pid}`.to_i/1024)
puts time.round(2)
> 1060 MB
> 2999 MB
Our inital dataset takes ~1GB, so why processing the data took ~2GB extra instead of just ~1GB.
require "benchmark"
num_rows = 100000
num_cols = 10
data = Array.new(num_rows) { Array.new(num_cols) { "x"*1000 } }
time = Benchmark.realtime do
csv = data.map do |row|
row.join(",")
end.join("\n")
end
puts time.round(2)
The CSV rows that we generate inside that block are actually intermediate results stored into memory until we can finnaly join them by the newline character.
This is exactly where we use that extra 1GB of memory
require "benchmark"
num_rows = 100000
num_cols = 10
data = Array.new(num_rows) { Array.new(num_cols) { "x"*1000 } }
time = Benchmark.realtime do
csv = ''
num_rows.times do |i|
num_cols.times do |j|
csv << data[i][j]
csv << "," unless j == num_cols - 1
end
csv << "\n" unless i == num_rows - 1
end
end
puts time.round(2)
GC enabled | 2.09 |
GC disabled | 1.16 |
Optimized | 0.94 |
Memory optimization is easy
str = "X" * 1024 * 1024 * 10
str = str.downcase
str = "X" * 1024 * 1024 * 10
str.downcase!
Modifying Strings in Place
10MB
0MB
x = "foo"
x += "bar"
# Equivalent to
x = "foo"
y = x + "bar"
x = y
# Use String::<<
x = "foo"
x << "bar"
Modifying Arrays and Hashes in Place
data = Array.new(100) { "x" * 1024 * 1024 }
measure do
data.map { |str| str.upcase }
end
data = Array.new(100) { "x" * 1024 * 1024 }
measure do
data.map! { |str| str.upcase! }
end
100MB
0MB
Ruby is especially bad in two areas