# Image formats 'png': ('img', ['png']), 'tif': ('img', ['tif', 'tiff']), 'jpg': ('img', ['jpg', 'jpeg']), 'psd': ('img', ['psd', 'psb']), 'jp2': ('img', ['jp2', 'j2k', 'j2c', ...]), # Camera RAW formats 'crw': ('img', ['crw']), 'dng': ('img', ['dng']), 'cr2': ('img', ['cr2']), 'zvi': ('img', ['zvi']), # Document formats 'pdf': ('doc', ['pdf', 'ai']), 'doc': ('doc', ['doc']), 'docx': ('doc', ['docx']), 'xls': ('doc', ['xls']), 'xlsx': ('doc', ['xlsx']), 'ppt': ('doc', ['ppt']), 'pptx': ('doc', ['pptx']),
# Video formats 'avi': ('vid', ['avi']), 'swf': ('vid', ['swf']), 'flv': ('vid', ['flv', 'f4v']), 'wmv': ('vid', ['wmv']), 'mpg': ('vid', ['mpg', 'mpeg']), 'asf': ('vid', ['asf', 'wmv', 'wma']), '3gp': ('vid', ['3gp']), '3g2': ('vid', ['3g2']), 'mkv': ('vid', ['mkv']), 'f4v': ('vid', ['f4v']), 'webm': ('vid', ['webm'] # Audio formats 'mp3': ('aud', ['mp3']), 'flac': ('aud', ['flac']), 'aif': ('aud', ['aif']), 'wma': ('aud', ['wma']),
Non profit operating in Cataloging & Media Management Systems for the Artwork industry with more than 1500 institutional partners.{Museums, Colleges, Universities, Libraries, etc }
Erlang Runtime spanning across machines and across data centers
Message Passing is at the core of the system
The code itself stays the same on a cluster
Thats the output
We observe that the time taken to create an Erlang process is constant 1µs up to 2,500 processes; thereafter it increases to about 3µs for up to 30,000 processes. The performance of Java and C# is shown at the top of the figure. For a small number of processes it takes about 300µs to create a process. Creating more than two thousand processes is impossible.
We see that for up to 30,000 processes the time to send a message between two Erlang processes is about 0.8µs. For C# it takes about 50µs per message, up to the maximum number of processes (which was about 1800 processes). Java was even worse, for up to 100 process it took about 50µs per message thereafter it increased rapidly to 10ms per message when there were about 1000 Java processes.
Pids = lists:map(fun(El) ->
spawn(fun() -> execute(S, Function, El) end)
end,
List),
gather(Pids).
Najam Ahmed
https://pk.linkedin.com/in/nansari89
Hashim Muqtadir
https://github.com/hashimmm
def register_blueprints(app):
app.register_blueprint(dastor_web, url_prefix='')
app.register_blueprint(dastor_task_api, url_prefix='/tasks')
app.register_blueprint(iiif.dastor_iiif, url_prefix='')
app.register_blueprint(editor, url_prefix='')
Organized using blue prints
@dastor_web.route('/stor/', methods=['POST'])
@dastor_web.route('/stor', methods=['POST'])
@decorators.require_token()
def ingest_asset():
"""Defines the POST request for ingesting assets into STOR.
:return: str -- JSON string containing the metadata of the asset
"""
ingest_tag = '/stor'
log(logger, 'info', "Initiating ingest request", tags=[ingest_tag,
'INGEST-START'])
asset_instance = _ingest_asset(ingest_tag)
return serve_metadata(asset_instance.uuid)
def _ingest_asset(ingest_tag):
asset_info = _extract_asset_info_from_request(ingest_tag=ingest_tag)
allow_any = request.form.get("ignore_unsupported", '0') or \
request.args.get("ignore_unsupported", '0')
allow_any = distutils.strtobool(allow_any)
asset_instance = asset.create(from_path=asset_info['file_path'],
filename=asset_info['file_name'],
filesize=asset_info['file_size'],
md5=asset_info['file_md5'],
project_id=asset_info['project_id'],
raise_on_unknown=not allow_any)
asset_instance.execute_tasks(transaction_id=g.get('transaction_id', ''))
return asset_instance
def serve_path(path, through_frontend=True, buffering=False,
expiry_time=False, throttling=False, download=False,
download_filename=None, temp=False, original_filename=None,
response = make_response((path, 200, []))
path = path.replace(root, "")
path = path.lstrip("/")
response.headers['X-Accel-Redirect'] = os.path.join(redirect_uri, path)
if not original_filename:
extension = os.path.splitext(path)[-1]
else:
extension = os.path.splitext(original_filename.lower())[-1]
if buffering:
response.headers['X-Accel-Buffering'] = "yes"
if expiry_time:
response.headers['X-Accel-Expires'] = str(expiry_time)
if throttling:
response.headers['X-Accel-Limit-Rate'] = str(throttling)
response.headers["Content-Type"] = quick_mime(extension)
if download:
disposition = "attachment" if not download_filename else\
'attachment; filename="%s"' % download_filename
response.headers["Content-Disposition"] = disposition
if temp:
response.headers['Cache-Control'] = 'no-cache'
response.headers['Pragma'] = 'no-cache'
response.headers["Refresh"] = 30
def serve_path(path, through_frontend=True, buffering=False,
expiry_time=False, throttling=False, download=False,
download_filename=None, temp=False, original_filename=None,
response = make_response((path, 200, []))
path = path.replace(root, "")
path = path.lstrip("/")
response.headers['X-Accel-Redirect'] = os.path.join(redirect_uri, path)
if not original_filename:
extension = os.path.splitext(path)[-1]
else:
extension = os.path.splitext(original_filename.lower())[-1]
if buffering:
response.headers['X-Accel-Buffering'] = "yes"
if expiry_time:
response.headers['X-Accel-Expires'] = str(expiry_time)
if throttling:
response.headers['X-Accel-Limit-Rate'] = str(throttling)
response.headers["Content-Type"] = quick_mime(extension)
if download:
disposition = "attachment" if not download_filename else\
'attachment; filename="%s"' % download_filename
response.headers["Content-Disposition"] = disposition
if temp:
response.headers['Cache-Control'] = 'no-cache'
response.headers['Pragma'] = 'no-cache'
response.headers["Refresh"] = 30
Feature: Asset Ingestion
Scenario: Ingest a simple jpeg Asset
Given we have a sample JPEG from our test asset location called test_jpg_img.jpg
when we ingest it into stor
then we get a valid json response containing a valid uuid
and we get a valid json response containing a valid filesize
and we get a valid json response containing a valid filetype
@given(
u'we have a sample JPEG from our test asset location called {image_name}')
def set_asset_in_world(context, image_name):
path = os.path.join(BASEPATH, image_name)
context.inputfile = {"path": path, "size": file_size(path)}
class TaskInterface(object):
"""Interface for the Task"""
"""Interface for the Stor's tasks."""
guid = NotImplemented
"""A unique identifier for the task (mainly used for logging)"""
friendly_name = NotImplemented
"""A user-friendly name for the task, to identify it in apis."""
def __init__(self, *args, **kwargs):
"""Initializer method
:param args: the arguments to be passed to the task methods
:param kwargs: the arguments to be passed to the task methods
:return: None
"""
raise NotImplementedError()
def execute(self):
""" The method that actually does stuff.
:return: some value meaningful for the next task (if applicable.)
"""
raise NotImplementedError()
class StorCeleryTask(PausableTask):
abstract = True
autoregister = True
serializer = 'json'
def __init__(self):
deferred = settings.get("stor", "deferred_ingestion", boolean=True)
if not deferred:
self.is_paused = lambda: False
@staticmethod
def to_wrap():
"""Override this to return the Stor Task to celery-fy."""
raise NotImplementedError()
def __call__(self, *args, **kwargs):
self.run = self.runner
return super(StorCeleryTask, self).__call__(*args, **kwargs)
def after_return(self, status, retval, task_id, args, kwargs, einfo):
db.Session.remove()
(Cont)
def runner(self, transaction_id='', *args, **kwargs):
_to_wrap = self.to_wrap()
log(logger, "debug",
"Inside wrapper object for class: {}".format(_to_wrap))
patcher = patch('stor.logs.LogContext',
CustomLogContext(transaction_id))
patcher.start()
start_time = time.time()
task = _to_wrap(*args, **kwargs)
task_guid = task.guid
try:
log(logger, "debug", "Running tasks for : {}".format(task))
task_return_value = task.execute()
except exception.TaskFailedException as e:
log(logger, 'exception', e.message, tags=[task_guid])
task_return_value = None
time_taken = time.time() - start_time
msg = "Task %s took time %s" % (task_guid, time_taken)
log(logger, 'info', msg, task=task_guid,
time_taken=time_taken, tags=[task_guid])
patcher.stop()
return task_return_value
MAX_MEMORY = 1073741824L
MAX_CPU = 90.0
logger = logging.getLogger('stor.smartscaler')
class Smartscaler(Autoscaler):
...
def _maybe_scale(self, req=None):
procs = self.processes
cur = min(self.qty, self.max_concurrency)
cpu_util = psutil.cpu_percent()
available_mem = psutil.virtual_memory()[1]
allow_workers = (cpu_util < MAX_CPU and available_mem > MAX_MEMORY)
if cur > procs and allow_workers:
worker_delta = cur - procs
msg = """Current workers: {cur}, current CPU: {cpu},
current RAM: {ram}. Spawning additional workers"""
log(logger, "INFO", msg.format(cur=cur, cpu=cpu_util,
ram=available_mem),
worker_delta=worker_delta, tags=['WORKER-BEAT'])
self.scale_up(worker_delta)
return True
elif cur < procs and not allow_workers:
worker_delta = (procs - cur) - self.min_concurrency
msg = """Current workers: {cur}, current CPU: {cpu},
current RAM: {ram}. Killing some workers"""
log(logger, "INFO", msg.format(cur=cur, cpu=cpu_util,
ram=available_mem),
worker_delta=-worker_delta, tags=['WORKER-BEAT'])
self.scale_down(worker_delta)
return True
├── __init__.py
├── celery.py
├── celery_scheduler.py
├── celerybackends
│ ├── __init__.py
│ └── database
│ ├── __init__.py
│ └── session.py
├── celeryconfig.py
├── corruption_detector.py
├── dastor_web
│ ├── __init__.py
│ ├── decorators.py
│ ├── forms.py
│ ├── templates
│ │ ├── ...
│ ├── tests
│ │ └── features
│ │ ├── ingest_asset.feature
│ │ └── steps
│ │ └── asset_steps.py
│ └── views.py
├── dastor_web_task_api
│ ├── __init__.py
│ ├── task_api_views.py
│ └── templates
├── database
│ ├── __init__.py
│ ├── access_rules.py
│ ├── asset.py
│ ├── celery_models.py
│ ├── document_page.py
│ ├── exifdata.py
│ ├── fixityrecord.py
│ ├── scanrecord.py
│ ├── tags.py
│ ├── task_groups.py
│ ├── tokens.py
│ └── user.py
├── exception.py
├── exiftool.py
├── filetype.py
├── logs
│ ├── __init__.py
│ └── contexts.py
├── scanner
│ ├── __init__.py
│ ├── bluprnt.py
│ └── scanutils.py
├── settings.py
├── smartscaler.py
├── stor-test.cfg
├── stor.cfg
├── thumbnail.py
└── util.py
├── controller
│ ├── __init__.py
│ ├── asset
│ │ ├── __init__.py
│ │ ├── asset.py
│ │ ├── assetgroup.py
│ │ ├── audio
│ │ │ ├── __init__.py
│ │ │ └── audio.py
│ │ ├── document
│ │ │ ├── __init__.py
│ │ │ ├── document.py
│ │ │ └── document_office.py
│ │ ├── image
│ │ │ ├── __init__.py
│ │ │ ├── image.py
│ │ │ ├── image_jpeg.py
│ │ │ └── image_tiff.py
│ │ ├── video
│ │ │ ├── __init__.py
│ │ │ └── video.py
│ │ └── virtual
│ │ ├── __init__.py
│ │ └── virtual.py
│ ├── ingestors.py
│ ├── interface
│ │ ├── __init__.py
│ │ ├── externalasset.py
│ │ ├── externalkalturaasset.py
│ │ ├── task.py
│ │ └── thumbnail.py
│ ├── kaltura_asset.py
│ ├── tags.py
│ ├── tasks
│ │ ├── __init__.py
│ │ ├── bulk
│ │ │ └── __init__.py
│ │ ├── bulkpyrimidal.py
│ │ ├── extractor.py
│ │ ├── fixity.py
│ │ ├── kaltura.py
│ │ ├── panorama.py
│ │ ├── pyrimidal.py
│ │ ├── reingest.py
│ │ ├── scanner.py
│ │ ├── taskutils.py
│ │ └── thumbnail.py
│ └── thumbnail
│ ├── __init__.py
│ ├── thumbnail.py
│ ├── thumbnail_canonraw.py
│ ├── thumbnail_generic.py
│ ├── thumbnail_imagemagick.py
│ ├── thumbnail_jpeg2000.py
│ ├── thumbnail_office.py
│ ├── thumbnail_pdf.py
│ ├── thumbnail_png.py
│ ├── thumbnail_tiff_multipage.py
│ ├── thumbnail_tiff_ycbcr.py
│ └── thumbnail_vr.py
├── Makefile
├── airmail.erl
├── archive.erl
├── archive.hrl
├── archivist.erl
├── asset.erl
├── asset.erl.erlydb
├── asset.erl.psycop
├── copies.erl
├── copies.erl.erlydb
├── copies.erl.psycop
├── dastor.app
├── dastor.erl
├── dastor.hrl
├── dastor_app.erl
├── dastor_deps.erl
├── dastor_sup.erl
├── dastor_web.E
├── dastor_web.erl
├── util.erl
├── uuid.erl
└── vips.erl
├── db_common.erl
├── db_coord.erl
├── errors.erl
├── errors.erl.erlydb
├── errors.erl.psycop
├── exif.erl
├── extern.erl
├── filetype.erl
├── logging.hrl
├── md5sum.erl
├── media.erl
├── mochiweb_mime.erl
├── morph.erl
├── plists.erl
├── procrastinator.erl
├── psql.app
├── pyrimidal.erl
├── qtvr.erl
├── rotating_logger.erl
├── stor.erl
├── thumbnail.erl
https://github.com/hashimmm/KTS
Structured Logfiles threaded through a transaction ID
Everything can now be indexed by ELK and we can get the set of operations for any activity in the system
And using the mouse we can make it run aggregations to give us valuable data
CTO & Cofounder Patari[Pakistans largest Music Streaming Portal]
iqbal@patari.pk
CTO Active Capital IT[Software Consultancy working with Cataloging, Artwork & Telecom Sectors]
italaat@acit.com
https://twitter.com/iqqi84
https://au.linkedin.com/pub/iqbal-bhatti/14/63/493