CWL Research Objects
Stian Soiland-Reyes
eScience lab, The University of Manchester
ELIXIR Workflow Interoperability implementation study
Hinxton, 2018-01-26
This work is licensed under a
Creative Commons Attribution 4.0 International License.
cwlVersion: v1.0
class: Workflow
inputs:
inp: File
ex: string
outputs:
classout:
type: File
outputSource: compile/classfile
steps:
untar:
run: tar-param.cwl
in:
tarfile: inp
extractfile: ex
out: [example_out]
compile:
run: arguments.cwl
in:
src: untar/example_out
out: [classfile]
{
"@context" : [ "https://w3id.org/bundle/context" ],
"id" : "/",
"manifest" : [ "manifest.json" ],
"createdOn" : "2017-08-24T10:57:46.325Z",
"createdBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"authoredBy" : [ {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
}, {
"uri" : "mailto:luka.stojanovic@sbgenomics.com",
"name" : "Luka Stojanovic"
}, {
"uri" : "mailto:crusoe@ucdavis.edu",
"name" : "Michael R. Crusoe"
}, {
"uri" : "mailto:porter@porter.st",
"name" : "Andrey Kartashov"
}, {
"uri" : "mailto:janko.simonovic@sbgenomics.com",
"name" : "Janko Simonovic"
} ],
"retrievedFrom" : "https://github.com/common-workflow-language/workflows/blob/lobstr-v1/workflows/lobSTR/",
"retrievedOn" : "2017-08-24T10:57:46.325Z",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"history" : [ "http:/git2prov.org/git2prov?giturl=https:/github.com/common-workflow-language/workflows.git&serialization=PROV-JSON" ],
"aggregates" : [ {
"uri" : "/workflow/tmp_2.fq",
"mediatype" : "application/octet-stream",
"createdOn" : "2017-08-24T10:57:46.923Z",
"authoredBy" : [ {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/tmp_2.fq",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"bundledAs" : {
"uri" : "urn:uuid:61579f3e-63e6-49c2-b780-f67b2df461b7",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/lobSTR-demo.json",
"mediatype" : "application/json",
"createdOn" : "2017-08-24T10:57:47.216Z",
"authoredBy" : [ {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-demo.json",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"bundledAs" : {
"uri" : "urn:uuid:973caa0e-f3bd-45e8-8d29-70123bc8715a",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/models/illumina_v3.pcrfree.stuttermodel",
"mediatype" : "application/octet-stream",
"createdOn" : "2017-08-24T10:57:47.239Z",
"authoredBy" : [ {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/models/illumina_v3.pcrfree.stuttermodel",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"bundledAs" : {
"uri" : "urn:uuid:62bbcbea-f34f-463f-990d-6148f8ed5e5c",
"folder" : "/workflow/models/"
}
}, {
"uri" : "/workflow/models/illumina_v3.pcrfree.stepmodel",
"mediatype" : "application/octet-stream",
"createdOn" : "2017-08-24T10:57:47.266Z",
"authoredBy" : [ {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/models/illumina_v3.pcrfree.stepmodel",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"bundledAs" : {
"uri" : "urn:uuid:03439ae7-cd94-42a3-b5fe-40bfff6882d8",
"folder" : "/workflow/models/"
}
}, {
"uri" : "/workflow/samtools-sort.cwl",
"mediatype" : "text/x-yaml",
"createdOn" : "2017-08-24T10:57:47.269Z",
"authoredBy" : [ {
"uri" : "mailto:luka.stojanovic@sbgenomics.com",
"name" : "Luka Stojanovic"
}, {
"uri" : "mailto:crusoe@ucdavis.edu",
"name" : "Michael R. Crusoe"
}, {
"uri" : "mailto:porter@porter.st",
"name" : "Andrey Kartashov"
}, {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/samtools-sort.cwl",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"conformsTo" : "https://w3id.org/cwl/v1.0",
"bundledAs" : {
"uri" : "urn:uuid:2dc07859-efc2-4945-a95f-ba7815b68d07",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/lobSTR-workflow.cwl",
"mediatype" : "text/x-yaml",
"createdOn" : "2017-08-24T10:57:47.42Z",
"authoredBy" : [ {
"uri" : "mailto:luka.stojanovic@sbgenomics.com",
"name" : "Luka Stojanovic"
}, {
"uri" : "mailto:crusoe@ucdavis.edu",
"name" : "Michael R. Crusoe"
}, {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-workflow.cwl",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"conformsTo" : "https://w3id.org/cwl/v1.0",
"bundledAs" : {
"uri" : "urn:uuid:58bc1895-3460-46d6-91d7-fa1718d09631",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/lobSTR-arvados-demo.json",
"mediatype" : "application/json",
"createdOn" : "2017-08-24T10:57:47.453Z",
"authoredBy" : [ {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-arvados-demo.json",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"bundledAs" : {
"uri" : "urn:uuid:30c683bc-69fb-4d93-8dad-65b663783af5",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/samtools-index.cwl",
"mediatype" : "text/x-yaml",
"createdOn" : "2017-08-24T10:57:47.458Z",
"authoredBy" : [ {
"uri" : "mailto:luka.stojanovic@sbgenomics.com",
"name" : "Luka Stojanovic"
}, {
"uri" : "mailto:crusoe@ucdavis.edu",
"name" : "Michael R. Crusoe"
}, {
"uri" : "mailto:porter@porter.st",
"name" : "Andrey Kartashov"
}, {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/samtools-index.cwl",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"conformsTo" : "https://w3id.org/cwl/v1.0",
"bundledAs" : {
"uri" : "urn:uuid:8235d3f8-6927-4f73-b160-8521838a1cbb",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/lobSTR-tool.cwl",
"mediatype" : "text/x-yaml",
"createdOn" : "2017-08-24T10:57:47.476Z",
"authoredBy" : [ {
"uri" : "mailto:luka.stojanovic@sbgenomics.com",
"name" : "Luka Stojanovic"
}, {
"uri" : "mailto:crusoe@ucdavis.edu",
"name" : "Michael R. Crusoe"
}, {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-tool.cwl",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"conformsTo" : "https://w3id.org/cwl/v1.0",
"bundledAs" : {
"uri" : "urn:uuid:7fa6fbe4-1fc5-4cb5-9c1a-56b96c5f7aaf",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/allelotype.cwl",
"mediatype" : "text/x-yaml",
"createdOn" : "2017-08-24T10:57:47.537Z",
"authoredBy" : [ {
"uri" : "mailto:luka.stojanovic@sbgenomics.com",
"name" : "Luka Stojanovic"
}, {
"uri" : "mailto:janko.simonovic@sbgenomics.com",
"name" : "Janko Simonovic"
}, {
"uri" : "mailto:crusoe@ucdavis.edu",
"name" : "Michael R. Crusoe"
}, {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/allelotype.cwl",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"conformsTo" : "https://w3id.org/cwl/v1.0",
"bundledAs" : {
"uri" : "urn:uuid:3706bd2f-e53f-431d-b32a-deb661d9b292",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/README",
"mediatype" : "application/octet-stream",
"createdOn" : "2017-08-24T10:57:47.555Z",
"authoredBy" : [ {
"uri" : "mailto:crusoe@ucdavis.edu",
"name" : "Michael R. Crusoe"
}, {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/README",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"bundledAs" : {
"uri" : "urn:uuid:ed54c4d6-c585-4dc9-b7bc-0cf299e20b91",
"folder" : "/workflow/"
}
}, {
"uri" : "/workflow/tmp_1.fq",
"mediatype" : "application/octet-stream",
"createdOn" : "2017-08-24T10:57:47.738Z",
"authoredBy" : [ {
"uri" : "mailto:peter.amstutz@curoverse.com",
"name" : "Peter Amstutz"
} ],
"retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/tmp_1.fq",
"retrievedBy" : {
"uri" : "https://view.commonwl.org",
"name" : "Common Workflow Language Viewer"
},
"bundledAs" : {
"uri" : "urn:uuid:5d431f81-ad0b-4acf-903a-9d5aa03b04df",
"folder" : "/workflow/"
}
}, {
"uri" : "/visualisation.png",
"mediatype" : "image/png",
"createdOn" : "2017-08-24T10:57:47.801Z",
"retrievedFrom" : "https://view.commonwl.org/graph/png/github.com/common-workflow-language/workflows/blob/lobstr-v1/workflows/lobSTR/lobSTR-workflow.cwl",
"bundledAs" : {
"uri" : "urn:uuid:ff9ace37-e76c-49f8-8d36-60f11ff6d257",
"folder" : "/"
}
}, {
"uri" : "/visualisation.svg",
"mediatype" : "image/svg+xml",
"createdOn" : "2017-08-24T10:57:47.821Z",
"retrievedFrom" : "https://view.commonwl.org/graph/svg/github.com/common-workflow-language/workflows/blob/lobstr-v1/workflows/lobSTR/lobSTR-workflow.cwl",
"bundledAs" : {
"uri" : "urn:uuid:a6cfb437-8818-4ab2-9081-efc74c5109e8",
"folder" : "/"
}
} ],
"annotations" : [ {
"uri" : "urn:uuid:9f602fff-b280-41c5-9590-ab95a49c85ad",
"about" : "/",
"content" : "annotations/merged.cwl"
}, {
"uri" : "urn:uuid:0ce4b727-ff61-4534-9afb-e3d676d2782d",
"about" : "/",
"content" : "annotations/workflow.ttl"
} ]
}
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow
label: "Hello World"
doc: "Outputs a message using echo"
inputs: []
outputs:
response:
outputSource: step0/response
type: File
steps:
step0:
run:
class: CommandLineTool
inputs:
message:
type: string
doc: "The message to print"
default: "Hello World"
inputBinding:
position: 1
baseCommand: echo
stdout: response.txt
outputs:
response:
type: stdout
in: []
out: [response]
https://doi.org/10.7490/f1000research.1114781.1
Farah Z Khan
BOSC hackathon 2017
Prototype PROV+RO export
CWL reference implementation
Copyright © 2013 W3C® (MIT, ERCIM, Keio, Beihang), All Rights Reserved. W3C liability, trademark and document use rules apply.
PROV Model Primer
W3C Working Group Note 30 April 2013
Which PROV format?
<prov:wasGeneratedBy>
<prov:entity prov:ref="ex:ent1"/>
<prov:activity prov:ref="ex:act1"/>
<prov:time>2017-10-26T21:32:52Z</prov:time>
<ex:port>p1</ex:port>
</prov:wasGeneratedBy>
wasGeneratedBy(ent1, act1,
2017-10-26T21:32:52Z, ex:port="p1")
:ent1
a prov:Entity;
prov:wasGeneratedBy :act1;
prov:generatedAtTime "2017-10-26T21:32:52Z"^^xsd:dateTime ;
ex:port "p1" .
"wasGeneratedBy": {
"ex:gen1": {
"prov:entity": "ent1",
"prov:activity": "act1",
"prov:time": "2017-10-26T21:32:52Z",
"ex:port": "p1"
},
},
{ "@context": { .. },
"@id": "ent1",
"@type": "prov:Entity",
"ex:port": "p1",
"prov:generatedAtTime": "2017-10-26T21:32:52Z",
"prov:wasGeneratedBy": {
"@id": "act1",
"@type": "prov:Activity"
}
}
PROV-N
PROV-XML
PROV-JSON
PROV-O Turtle
PROV-O JSON-LD
Nested workflows
A single activity unrolled to multiple steps
activity(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, , ,
[prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main"])
// main workflow run started outside somehow (we're don't know how)
wasStartedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -,
-, 2017-10-27T15:00:00Z)
// ...
// step is a nested workflow, so also a WorkflowRun
activity(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -,
[prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main/nested1"])
// started by the mother activity
wasStartedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -,
run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:30Z)
// inner step of nested workflow, ProcessRun as this is a command line execution
activity(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, -
[prov:type='wfprov:ProcessRun', prov:label="Run of workflow/packed.cwl#nested/innerStep1"])
wasStartedBy(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, -,
run:4305467e-6dfb-11e7-885d-0242ac110002, 2017-10-27T15:01:00Z)
// ...
Identifying intermediate data
Output 1B file is also Input 2C and Input 3D downstream
Simple filenames -> duplications
./data/step1/outputB.txt
./data/step2/inputC.txt
./data/step3/inputD.txt
Content-adressable
SHA-256 hash of bytes as filename:
./data/51/51fb8af0c4ae0422fbe88340d91880ecb9d7537cf57339c1cf1256b7ca58f32d
RFC6920 URI as global identifier:
nih:sha-256;51fb8af0c4ae0422fbe88340d91880ecb9d7537cf57339c1cf1256b7ca58f32d
prov:alternateOf
Relating global identifier to local paths
used(run:2e1287e0-6dfb-11e7-8acf-0242ac110002,
data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03,
2017-10-27T14:29:00+01:00, [prov:role='wf:main/input1']))
entity(data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03)
// which we have stored a copy of within the research object
specializationOf(./data/58/5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03,
data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03)
Large data would not have a ./data/ file,
but might be referenced externally
Workflow provenance profiles
How to tie it all together?
id: doi:10.15490/seek.1.investigation.56
createdOn: 2015-07-10T16:46:00Z
createdBy: http://orcid.org/0000-0001-9842-9718
aggregates:
- id: data/sequence/specimen5.bam
conformsTo: http://gemrb.org/iesdp/file_formats/ie_formats/bam_v1.htm
- id: http://example.com/blog/about-specimen5
authoredBy: http://orcid.org/0000-0001-7066-3350
- id: http://www.myexperiment.org/workflows/3355
history: provenance/workflow-evolution.ttl
annotations:
- about: data/sequence/specimen5.bam
content: annotations/specimen5-properties.jsonld
createdBy: http://orcid.org/0000-0001-7066-3350
- about: data/sequence/specimen5.bam
content: http://example.com/blog/about-specimen5
motivatedBy: oa:questioning
Research Object manifest
(simplified)
Reuse standards:
OAI-ORE, BagIt, W3C JSON-LD, PROV, Web Annotation Model
metadata/manifest.json
data/sequence/specimen5.bam
provenance/workflow-evolution.ttl
http://example.com/blog/about-specimen5
http://www.myexperiment.org/workflows/335
http://orcid.org/0000-0001-7066-3350
http://gemrb.org/iesdb/
file_formats_ie_formats_bam_v1.html
Who is using Research Objects?
Structure of CWL run Research Object:
- data: content-adressable by sha256 hash
-
workflow: input object (json file) with relativised paths, packed.cwl
executable workflow containing the workflow specification and tool specifications with relativised paths to re-run inside an RO. - snapshot: This directory contains copies of the original workflow and tool specifications files as-is (warning: might contain absolute paths or be host-specific).
- metadata: provenance about the workflow run, its data products and manifest for this Research Object.
document
prefix wfprov <http://purl.org/wf4ever/wfprov#>
prefix prov <http://www.w3.org/ns/prov#>
prefix wfdesc <http://purl.org/wf4ever/wfdesc#>
prefix wf <https://w3id.org/cwl/view/git/933bf2a1a1cce32d88f88f136275535da9df0954/workflows/hello/hello.cwl#>
prefix input <app://579c1b74-b328-4da6-80a8-a2ffef2ac9b5/workflow/input.json#>
prefix run <urn:uuid:>
prefix engine <urn:uuid:>
prefix data <nih:sha-256;>
default <app://579c1b74-b328-4da6-80a8-a2ffef2ac9b5/>
// Level 1 provenance of workflow run
activity(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, , , [prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main"])
wasStartedBy(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, -, -, -, 2017-10-27T14:24:00+01:00)
// The engine is the SoftwareAgent that is executing our Workflow plan
wasAssociatedWith(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, wf:main)
agent(engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, prov:type='prov:SoftwareAgent', prov:type='wfprov:WorkflowEngine', prov:label="cwltool v1.2.5")
// prov has no term to relate sub-plans - we'll use wfdesc:hasSubProcess
entity(wf:main,[prov:type='wfdesc:Workflow', prov:type='prov:Plan', wfdesc:hasSubProcess='wf:main/step1', wfdesc:hasSubProcess='wf:main/step2'])
alternateOf(wf:main, workflow/packed.cwl)
entity(wf:main/step1,[prov:type='wfdesc:Process', prov:type='prov:Plan'])
entity(wf:main/step2,[prov:type='wfdesc:Process', prov:type='prov:Plan'])
// First the workflow uses some data; here with a sha256 identifier
used(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, 2017-10-27T14:29:00+01:00, [prov:role='wf:main/input1']))
entity(data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, [prov:type='wfprov:Artifact'])
// which we have stored a copy of within the research object
specializationOf(data/58/5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03)
// Then there was another activity - wfprov:ProcessRun indicating a command line tool
activity(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, [prov:type='wfprov:ProcessRun', prov:label="Run of workflow/packed.cwl#main/step1"])
// started by the mother activity
wasStartedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:00+01:00)
// same engine using step1 as plan. In a distributed scenario there might be a different engine
wasAssociatedWith(run:4305467e-6dfb-11e7-885d-0242ac110002, engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, wf:main/step1)
// This activity also use the same data, but in a different role (e.g. input parameter)
used(run:4305467e-6dfb-11e7-885d-0242ac110002, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, 2017-10-27T14:00:00+01:00, [prov:role='wf:main/step1/in1'])
// And we generate some new data
wasGeneratedBy(data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, run:4305467e-6dfb-11e7-885d-0242ac110002, 2017-10-27T16:00:00+01:00, [prov:role='wf:main/step1/out1']))
entity(data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, [prov:type='wfprov:Artifact'])
// again stored in the RO
specializationOf(data/00/00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c)
// step1 finished
wasEndedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:30:00+01:00)
// the master workflow then "generate" that same value, but now at a different time and role (the resultA master workflow output)
wasGeneratedBy(data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/resultA'])
// next step activity
activity(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, - [prov:type='wfprov:ProcessRun', prov:label="Run of workflow/packed.cwl#main/step2"])
wasStartedBy(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T16:00:00+01:00)
// associated with step2
wasAssociatedWith(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, wf:main/step2)
// Uses two data artifacts; one which came from previous step, other as workflow input
used(run:4305467e-6dfb-11e7-885d-0242ac110002, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/step2/valueA'])
used(run:4305467e-6dfb-11e7-885d-0242ac110002, data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/step2/valueB'])
// and generate two new data artifacts
wasGeneratedBy(data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, run:c42dc36e-6dfd-11e7-bc24-0242ac110002, 2017-10-27T16:34:20+01:00, [prov:role='wf:main/step2/out1'])))
entity(data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, [prov:type='wfprov:Artifact'])
specializationOf(data/95/2f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d)
wasGeneratedBy(data:3deb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0, run:c42dc36e-6dfd-11e7-bc24-0242ac110002, 2017-10-27T16:34:20+01:00, [prov:role='wf:main/step2/out2'])))
entity(data:3deb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0, [prov:type='wfprov:Artifact'])
specializationOf(data/3d/eb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0, data:3deb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0)
// step2 ends
wasEndedBy(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T16:30:00+01:00)
// only step output out1 captured by mother workflow, sent to resultB workflow output
wasGeneratedBy(data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/resultB'])
// mother workflow ends
wasEndedBy(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T16:34:40+01:00)
endDocument
New for this implementation study?
Portal-level provenance
Which portal?
Who submitted the job?
Where did the data come from?
Which samples?
Job-level provenance
(Workflow) job submission
Nested jobs?
Cluster/grid/cloud execution
Workflow references
Indirect data references
Performance statistics
Usage Records
"Previous runs" annotations
Estimates for memory/cores/disk/time
Correlate with dataset stats
Links to research objects / provenance
Multiple estimation annotations or dynamically modify Requirements?
Enriches visualization
Extend CWL viewer to show Requirements
Show previous runs annotations
Expose deep annotations (e.g. EDAM typing)
Links to previous runs Research Objects
(discovered by Linked Data Notifications?)
Visualize run RO?
References to large data?
How to describe
Resource Usage?
<?xml version="1.0" encoding="UTF-8"?>
<ur:UsageRecord xmlns="http://schema.ogf.org/urf/2013/04/urf"
xmlns:ur="http://schema.ogf.org/urf/2013/04/urf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://schema.ogf.org/urf/2013/04/urf">
<ur:RecordIdentityBlock>
<ur:RecordId>urn:uuid:4350d583-61a5-45e8-a229-957aa81e8014</ur:RecordId>
<ur:CreateTime>2018-05-09T09:06:52Z</ur:CreateTime>
<ur:Site>EMBL-EBI</ur:Site>
<ur:Infrastructure>Embassy</ur:Infrastructure>
</ur:RecordIdentityBlock>
<ur:SubjectIdentityBlock>
<ur:LocalUserId>stain</ur:LocalUserId>
<ur:LocalGroupId>ELIXIRCWLImplStudy</ur:LocalGroupId>
<ur:GlobalUserId>https://orcid.org/0000-0001-9842-9718</ur:GlobalUserId>
</ur:SubjectIdentityBlock>
<ur:ComputeUsageBlock>
<ur:CpuDuration>PT3600S</ur:CpuDuration>
<ur:WallDuration>PT3600S</ur:WallDuration>
<ur:StartTime>2018-05-31T11:00:00</ur:StartTime>
<ur:EndTime>2018-05-31T12:00:00</ur:EndTime>
<ur:ExecutionHost>
<ur:Hostname>compute-0-1.example.com</ur:Hostname>
<ur:ProcessId>1042</ur:ProcessId>
<ur:Benchmark ur:type="si2k">3.14</ur:Benchmark>
</ur:ExecutionHost>
<ur:Processors>4</ur:Processors>
<ur:NodeCount>1</ur:NodeCount>
</ur:ComputeUsageBlock>
<ur:JobUsageBlock>
<ur:GlobalJobId>host.example.org/ab1234</ur:GlobalJobId>
<ur:LocalJobId>ab1234</ur:LocalJobId>
<ur:JobName>MetaGenomics1337</ur:JobName>
<ur:Queue ur:description="execution">"Bigmem"</ur:Queue>
<ur:TimeInstant ur:type="Ctime">2018-05-31T10:30:00</ur:TimeInstant>
<ur:TimeInstant ur:type="Qtime">2018-05-31T10:31:00</ur:TimeInstant>
<ur:TimeInstant ur:type="Etime">2018-05-31T10:59:42</ur:TimeInstant>
</ur:JobUsageBlock>
<ur:MemoryUsageBlock>
<ur:MemoryClass>"RAM"</ur:MemoryClass>
<ur:MemoryResourceCapacityUsed>14728</ur:MemoryResourceCapacityUsed>
<ur:MemoryResourceCapacityAllocated>56437</ur:MemoryResourceCapacityAllocated>
<ur:MemoryResourceCapacityRequested>42000</ur:MemoryResourceCapacityRequested>
<ur:StartTime>2018-05-31T11:00:00</ur:StartTime>
<ur:EndTime>2018-05-31T12:00:00</ur:EndTime>
</ur:MemoryUsageBlock>
<ur:StorageUsageBlock>
<ur:StorageShare>pool-003</ur:StorageShare>
<ur:StorageMedia>disk</ur:StorageMedia>
<ur:StorageClass>replicated</ur:StorageClass>
<ur:DirectoryPath>/projectA</ur:DirectoryPath>
<ur:FileCount>42</ur:FileCount>
<ur:StorageResourceCapacityUsed>14728</ur:StorageResourceCapacityUsed>
<ur:StorageLogicalCapacityUsed>13617</ur:StorageLogicalCapacityUsed>
<ur:StorageResourceCapacityAllocated>14624
</ur:StorageResourceCapacityAllocated>
<ur:StartTime>2018-05-07T09:31:40Z</ur:StartTime>
<ur:EndTime>2018-05-08T09:29:42Z</ur:EndTime>
<ur:Host>host.example.org</ur:Host>
</ur:StorageUsageBlock>
<ur:CloudUsageBlock>
<ur:LocalVirtualMachineId>ab1234</ur:LocalVirtualMachineId>
<ur:GlobalVirtualMachineId>
host.example.org/ab1234/2018-05-09T09:06:52Z
</ur:GlobalVirtualMachineId>
<ur:Status>started</ur:Status>
<ur:SuspendDuration>PT3600S</ur:SuspendDuration>
<ur:ImageId>UbuntuImage2013</ur:ImageId>
<ur:MachineName>cloud.example.org</ur:MachineName>
<ur:SubmitHost>
cloud-name=cloud.example.org,Mds-Vo-name=local,o=cloud
</ur:SubmitHost>
<ur:TimeInstant ur:type="Ctime">2018-05-31T10:30:00</ur:TimeInstant>
<ur:TimeInstant ur:type="Qtime">2018-05-31T10:31:00</ur:TimeInstant>
<ur:TimeInstant ur:type="Etime">2018-05-31T10:59:42</ur:TimeInstant>
<ur:ServiceLevel>Premium</ur:ServiceLevel>
</ur:CloudUsageBlock>
<ur:NetworkUsageBlock>
<ur:NetworkClass ur:NetworkResourceBandwidth="100000000">"Ethernet"</ur:NetworkClass>
<ur:NetworkInboundUsed ur:SourceAddress=192.168.1.12>14728</ur:NetworkInboundUsed>
<ur:NetworkOutboundUsed ur:DestinationAddress=192.168.1.21>14728</ur:NetworkOutboundUsed>
</ur:NetworkUsageBlock>
</ur:UsageRecord>
Copy of 2018-01-26 Research Objects and CWL
By Farah Z Khan
Copy of 2018-01-26 Research Objects and CWL
Presented at ELIXIR CWL Implementation Study meeting, Hinxton 2018-01-16
- 1,001