File uploading with Shrine gem
RRUG #10 - 9.11.2017
Przemek Sienkowski
Ruby Developer
What is Shrine?
- Alternative for (CarrierWave, Refile, Paperclip, ...)
- Combine their advantages and functionalities
- Shrine implements a plugin system
- Flexibility - configurable differently depending on what types of files are being uploaded
- Simplicity
Shrine advantages
- build-in support for AWS S3
- direct upload
- backgrounding
- validation helpers
- determining MIME type
- ActiveRecord and Sequel support
- automatically back up stored files to an additional storage
- copying attachment between two records
- ...
Components parts
- Storage - object responsible for managing files on a specific storage service
- Plugin - load the code only for features that you need
- Uploader
- Attacher
My experiences with Shrine
The goal was to achieve followed behaviours:
- Files should be uploaded on AWS S3 for staging and productions.
- Direct upload from the FE application to S3 should be possible.
- Upload from the URL should be possible with handling validation errors about incorrect/unreachable URL.
- Upload should be done in the background.
Gem installation & configuration
# Gemfile
gem 'shrine'
gem 'aws-sdk-s3'
# config/initializers/shrine.rb
require 'shrine'
require "shrine/storage/s3"
s3_options = {
access_key_id: Rails.application.secrets.s3_access_key_id,
secret_access_key: Rails.application.secrets.s3_secret_access_key,
region: Rails.application.secrets.s3_region,
bucket: Rails.application.secrets.s3_bucket
}
Shrine.storages = {
cache: Shrine::Storage::S3.new(prefix: "cache", **s3_options),
store: Shrine::Storage::S3.new(prefix: "store", **s3_options)
}
Shrine.plugin :activerecord
What about test and development?
group :test do
gem 'shrine-memory'
end
# config/initializers/shrine.rb
require 'shrine'
if Rails.env.development?
require "shrine/storage/file_system"
Shrine.storages = {
cache: Shrine::Storage::FileSystem.new("public", prefix: "uploads/cache"),
store: Shrine::Storage::FileSystem.new("public", prefix: "uploads/store")
}
elsif Rails.env.test?
require 'shrine/storage/memory'
Shrine.storages = {
cache: Shrine::Storage::Memory.new,
store: Shrine::Storage::Memory.new
}
else
require "shrine/storage/s3"
s3_options = {
access_key_id: Rails.application.secrets.s3_access_key_id,
secret_access_key: Rails.application.secrets.s3_secret_access_key,
region: Rails.application.secrets.s3_region,
bucket: Rails.application.secrets.s3_bucket
}
Shrine.storages = {
cache: Shrine::Storage::S3.new(prefix: "cache", **s3_options),
store: Shrine::Storage::S3.new(prefix: "store", **s3_options)
}
end
Shrine.plugin :activerecord
Linking the uploader
class AddFileDataToAttachments < ActiveRecord::Migration
def change
add_column :attachments, :file_data, :text
end
end
# app/uploaders/attachment.rb
require 'shrine/plugins/determine_mime_type'
class AttachmentUploader < Shrine
plugin :determine_mime_type
end
# app/models/attachment.rb
class Attachment < ApplicationRecord
include AttachmentUploader.attachment(:file)
end
Flow of file uploading
- FE application sends request to presign URL, which will be used to upload file directly from the FE application to the S3.
- BE application responds with JSON, which contains data required to authenticating with S3 and uploading a file.
- FE uploads the file to temporary location by using data from the previous step.
- FE generates JSON with cached on the S3 file and sends it to the BE.
- BE updating a record in the database and move file between "cache" storage to "store".
Pre-signing endpoint
# config/initializers/shrine.rb
# ...
Shrine.plugin :presign_endpoint
# config/routes.rb
Rails.application.routes.draw do
mount Shrine.presign_endpoint(:cache) => "/presign"
end
Pre-signing endpoint
{
"url": "https://bucket-name.s3.eu-central-1.amazonaws.com",
"fields": {
"key": "cache/2084783353bf6e5fdfe3420b8bff3a8c.pdf",
"policy": "eyJleHBpcmF0aW9uIjoiMjAxNy0xMC0xOVQwODoxMDowkaXRpb25...BaIn1dfQ==",
"x-amz-credential": "AKIAJZNAKLVM1ADO124A/20171019/eu-central-1/s3/aws4_request",
"x-amz-algorithm": "AWS4-HMAC-SHA256",
"x-amz-date": "20171019T071000Z",
"x-amz-signature": "19c31571fb1031d64bed4c31eecc2bf5fe5737855z4378a640326d6061114714"
},
"headers": {}
}
GET /presign?filename=file.pdf
It's enough to upload file directly on the S3.
What's with development?
# lib/attachments/storage/presigned_file_system.rb
require 'shrine/storage/file_system'
class Attachments::Storage::PresignedFileSystem < Shrine::Storage::FileSystem
include Rails.application.routes.url_helpers
PresignedPost = Struct.new(:url, :fields)
def presign(*_args)
url = URI::HTTP.build(
host: Rails::Server.new.options[:Host],
port: Rails::Server.new.options[:Port],
path: api_v1_attachment_uploader_upload_endpoint_path + '/cache/upload'
).to_s
PresignedPost.new(url, {})
end
end
# config/initializers/shrine.rb
require 'shrine'
if Rails.env.development?
Shrine.storages = {
cache: Attachments::Storage::PresignedFileSystem.new("public", prefix: "uploads/cache"),
store: Attachments::Storage::PresignedFileSystem.new("public", prefix: "uploads/store")
}
elsif Rails.env.test?
# ...
else
# ...
end
# ...
What's with development?
{
"url": "http://localhost:3000/attachments/cache/upload",
"fields": {},
"headers": {}
}
GET /presign?filename=file.pdf
What's with development?
# app/uploaders/attachment.rb
require 'shrine/plugins/upload_endpoint'
class AttachmentUploader < Shrine
# ...
plugin :upload_endpoint
end
# config/routes.rb
Rails.application.routes.draw do
mount Shrine.presign_endpoint(:cache) => "/presign"
if %w(development test).include?(Rails.env)
mount AttachmentUploader.upload_endpoint(:cache) => "/attachments"
end
end
File data
{
"id": "2084783353bf6e5fdfe3420b8bff3a8c.pdf",
"storage": "cache",
"metadata": {
"filename": "example.pdf",
"size": 1024,
"mime_type": "application/pdf"
}
}
a = Attachment.new(file: file_data)
a.file
# => #<AttachmentUploader::UploadedFile:0x007f80db3f9590
# @data={"id"=>"2084783353bf6e5fdfe3420b8bff3a8c.pdf", "storage"=>"cache",
# "metadata"=>{"filename"=>"file.pdf", "size"=>1024, "mime_type"=>"application/pdf"}}>
a.save
a.reload.file
# => #<AttachmentUploader::UploadedFile:0x007f8f0d74dcd8
# @data={"id"=>"504e412892b9e869136abe645e15e3c8.pdf", "storage"=>"store",
# "metadata"=>{"filename"=>"file.pdf", "size"=>1024, "mime_type"=>"application/pdf"}}>
Cache cleaning
Backgrounding
# config/application.rb
# ...
class Application < Rails::Application
config.active_job.queue_adapter = :sidekiq
# ...
end
# config/initializers/shrine.rb
# ...
Shrine.plugin :backgrounding
Backgrounding
# app/jobs/promote_job.rb
class PromoteJob < ActiveJob::Base
def perform(data)
Shrine::Attacher.promote(data)
end
end
# app/jobs/delete_job.rb
class DeleteJob < ActiveJob::Base
def perform(data)
Shrine::Attacher.delete(data)
end
end
# app/uploaders/attachment.rb
class AttachmentUploader < Shrine
# ...
Attacher.promote { |data| PromoteJob.perform_later(data) }
Attacher.delete { |data| DeleteJob.perform_later(data) }
end
Uploading from URL
# Gemfile
gem 'shrine-url'
{
id: 'http://example.com/example.pdf',
storage: 'cache',
metadata: { ... }
}
`remote_url` plugin isn't proper choose
It provides additional storage class. The main idea of this gem is storing file data like below:
How choose storage dynamically?
Shrine.storages = {
cache: Shrine::Storage::S3.new(prefix: "cache", **s3_options),
store: Shrine::Storage::S3.new(prefix: "store", **s3_options)
}
We want to have possibilities to keep 2 object as cache storage implementation.
current storage configuration:
default_storage & dynamic_storage plugins
# app/uploaders/attachment.rb
class AttachmentUploader < Shrine
# ...
plugin :dynamic_storage
storage /cache_(\w+)/ do |match|
match[1] == 'url' ? Attachments::Storage::Url.new : Shrine.storages[:cache]
end
plugin :default_storage, cache: lambda { |record, _name|
id = JSON.parse(record.file_data || {}.to_json)['id']
id =~ URI::regexp(['http', 'https']) ? :cache_url : :cache
}
end
default_storage & dynamic_storage plugins
# app/uploaders/attachment.rb
class AttachmentUploader < Shrine
# ...
plugin :dynamic_storage
storage /cache_(\w+)/ do |match|
match[1] == 'url' ? Attachments::Storage::Url.new : Shrine.storages[:cache]
end
plugin :default_storage, cache: lambda { |record, _name|
id = JSON.parse(record.file_data || {}.to_json)['id']
id =~ URI::regexp(['http', 'https']) ? :cache_url : :cache
}
end
but it will not work :(
default_storage & dynamic_storage plugins
1.
{
id: 'http://example.com/example.pdf',
storage: 'cache_url',
metadata: { ... }
}
2.
# app/uploaders/attachment.rb
class AttachmentUploader < Shrine
# ...
def uploaded?(uploaded_file)
uploaded_file.storage_key.match(storage_key.to_s) ||
storage_key.to_s.match(uploaded_file.storage_key)
end
end
After that the file uploading to local system storage worked properly, but issue for Shrine on GH has ben reported.
New way to selecting storages dynamically
{
id: 'http://example.com/example.pdf',
storage: 'cache_url',
metadata: { ... }
}
cache_type = JSON.parse(file_data)['storage'])
attachment.file_attacher(cache: cache_type)
attachment.file = file_data
# config/initializers/shrine.rb
# ...
Shrine.storages[:cache_url] = Shrine::Storage::Url.new
Both of ways doesn't work for S3 storage
# lib/attachments/storage/s3_with_remoteable.rb
require "shrine/storage/s3"
class Attachments::Storage::S3WithRemoteable < Shrine::Storage::S3
def put(io, id, **options)
return super unless io.is_a?(AttachmentUploader::UploadedFile) &&
defined?(Shrine::Storage::Url) &&
io.storage.is_a?(Shrine::Storage::Url)
io.open do |body|
object(id).put(body: body, **options)
end
end
end
# config/initializers/shrine.rb
if Rails.env.development?
# ...
elsif Rails.env.test?
# ...
else
s3_options = { ... }
Shrine.storages = {
cache: Attachments::Storage::S3WithRemoteable.new(prefix: "cache", **s3_options),
store: Attachments::Storage::S3WithRemoteable.new(prefix: "store", **s3_options)
}
end
Unfortunately the storage class for S3 does not support copying from the URL
Assigning remote file
# app/model/attachment.rb
# ...
def file_remote_url=(value)
return if value.nil?
remote_file = Attachments::RemoteFile.new(value, file_attacher)
self.file = remote_file.file_data.to_json
end
def file=(value)
set_file_cache_dynamically(value)
super
end
def set_file_cache_dynamically(file_data)
file_attacher(cache: JSON.parse(file_data)['storage'])
rescue JSON::ParserError => _e
file_attacher(cache: 'cache')
end
Validation of URL correctness and reachability
# lib/attachments/storage/url.rb
require "shrine/storage/url"
class Attachments::Storage::Url < Shrine::Storage::Url
class IncorrectUrlError < StandardError; end
def fetch_metadata(id)
raise IncorrectUrlError unless id =~ URI::regexp(['http', 'https'])
request(:head, id)
rescue SocketError, URI::InvalidURIError => e
raise IncorrectUrlError.new(e)
end
end
# config/initializers/shrine.rb
# ...
Shrine.storages[:cache_url] = Attachments::Storage::Url.new
Fetching metadata
# lib/attachments/remote_file.rb
class Attachments::RemoteFile
STORAGE_NAME = :cache_url
Metadata = Struct.new(:filename, :size, :mime_type) do
def initialize(filename = nil, size = 0, mime_type = nil)
super
end
end
attr_reader :url, :attacher, :incorrect_url
def initialize(url, attacher)
@url = url
@attacher = attacher
@incorrect_url = false
end
def file_name
path = URI.parse(url).path
File.basename(path)
end
def file_data
{
id: url,
storage: STORAGE_NAME,
metadata: metadata
}
end
def correct_url?
!incorrect_url
end
private
def metadata
@metadata ||= fetch_metadata
end
def fetch_metadata
unless head_response.is_a? Net::HTTPSuccess
report_incorrect_url!
end
Metadata.new(
file_name,
head_response.content_length,
head_response.content_type
).to_h
rescue Attachments::Storage::Url::IncorrectUrlError =>_e
report_incorrect_url!
Metadata.new.to_h
end
def uploader_class
@attacher.store.class
end
def storage
@storage ||= uploader_class.find_storage(STORAGE_NAME)
end
def head_response
@head_response ||= storage.fetch_metadata(url)
end
def report_incorrect_url!
@incorrect_url = true
end
end
Thanks
File uploading with Shrine gem
By Przemek Sienkowski
File uploading with Shrine gem
- 1,969