CWL Research Objects

Stian Soiland-Reyes

eScience lab, The University of Manchester

Meeting w/ BioSimSpace
Manchester, 2018-05-15

cwlVersion: v1.0
class: Workflow
inputs:
  inp: File
  ex: string

outputs:
  classout:
    type: File
    outputSource: compile/classfile

steps:
  untar:
    run: tar-param.cwl
    in:
      tarfile: inp
      extractfile: ex
    out: [example_out]

  compile:
    run: arguments.cwl
    in:
      src: untar/example_out
    out: [classfile]
{
  "@context" : [ "https://w3id.org/bundle/context" ],
  "id" : "/",
  "manifest" : [ "manifest.json" ],
  "createdOn" : "2017-08-24T10:57:46.325Z",
  "createdBy" : {
    "uri" : "https://view.commonwl.org",
    "name" : "Common Workflow Language Viewer"
  },
  "authoredBy" : [ {
    "uri" : "mailto:peter.amstutz@curoverse.com",
    "name" : "Peter Amstutz"
  }, {
    "uri" : "mailto:luka.stojanovic@sbgenomics.com",
    "name" : "Luka Stojanovic"
  }, {
    "uri" : "mailto:crusoe@ucdavis.edu",
    "name" : "Michael R. Crusoe"
  }, {
    "uri" : "mailto:porter@porter.st",
    "name" : "Andrey Kartashov"
  }, {
    "uri" : "mailto:janko.simonovic@sbgenomics.com",
    "name" : "Janko Simonovic"
  } ],
  "retrievedFrom" : "https://github.com/common-workflow-language/workflows/blob/lobstr-v1/workflows/lobSTR/",
  "retrievedOn" : "2017-08-24T10:57:46.325Z",
  "retrievedBy" : {
    "uri" : "https://view.commonwl.org",
    "name" : "Common Workflow Language Viewer"
  },
  "history" : [ "http:/git2prov.org/git2prov?giturl=https:/github.com/common-workflow-language/workflows.git&serialization=PROV-JSON" ],
  "aggregates" : [ {
    "uri" : "/workflow/tmp_2.fq",
    "mediatype" : "application/octet-stream",
    "createdOn" : "2017-08-24T10:57:46.923Z",
    "authoredBy" : [ {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/tmp_2.fq",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "bundledAs" : {
      "uri" : "urn:uuid:61579f3e-63e6-49c2-b780-f67b2df461b7",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/lobSTR-demo.json",
    "mediatype" : "application/json",
    "createdOn" : "2017-08-24T10:57:47.216Z",
    "authoredBy" : [ {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-demo.json",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "bundledAs" : {
      "uri" : "urn:uuid:973caa0e-f3bd-45e8-8d29-70123bc8715a",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/models/illumina_v3.pcrfree.stuttermodel",
    "mediatype" : "application/octet-stream",
    "createdOn" : "2017-08-24T10:57:47.239Z",
    "authoredBy" : [ {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/models/illumina_v3.pcrfree.stuttermodel",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "bundledAs" : {
      "uri" : "urn:uuid:62bbcbea-f34f-463f-990d-6148f8ed5e5c",
      "folder" : "/workflow/models/"
    }
  }, {
    "uri" : "/workflow/models/illumina_v3.pcrfree.stepmodel",
    "mediatype" : "application/octet-stream",
    "createdOn" : "2017-08-24T10:57:47.266Z",
    "authoredBy" : [ {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/models/illumina_v3.pcrfree.stepmodel",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "bundledAs" : {
      "uri" : "urn:uuid:03439ae7-cd94-42a3-b5fe-40bfff6882d8",
      "folder" : "/workflow/models/"
    }
  }, {
    "uri" : "/workflow/samtools-sort.cwl",
    "mediatype" : "text/x-yaml",
    "createdOn" : "2017-08-24T10:57:47.269Z",
    "authoredBy" : [ {
      "uri" : "mailto:luka.stojanovic@sbgenomics.com",
      "name" : "Luka Stojanovic"
    }, {
      "uri" : "mailto:crusoe@ucdavis.edu",
      "name" : "Michael R. Crusoe"
    }, {
      "uri" : "mailto:porter@porter.st",
      "name" : "Andrey Kartashov"
    }, {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/samtools-sort.cwl",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "conformsTo" : "https://w3id.org/cwl/v1.0",
    "bundledAs" : {
      "uri" : "urn:uuid:2dc07859-efc2-4945-a95f-ba7815b68d07",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/lobSTR-workflow.cwl",
    "mediatype" : "text/x-yaml",
    "createdOn" : "2017-08-24T10:57:47.42Z",
    "authoredBy" : [ {
      "uri" : "mailto:luka.stojanovic@sbgenomics.com",
      "name" : "Luka Stojanovic"
    }, {
      "uri" : "mailto:crusoe@ucdavis.edu",
      "name" : "Michael R. Crusoe"
    }, {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-workflow.cwl",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "conformsTo" : "https://w3id.org/cwl/v1.0",
    "bundledAs" : {
      "uri" : "urn:uuid:58bc1895-3460-46d6-91d7-fa1718d09631",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/lobSTR-arvados-demo.json",
    "mediatype" : "application/json",
    "createdOn" : "2017-08-24T10:57:47.453Z",
    "authoredBy" : [ {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-arvados-demo.json",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "bundledAs" : {
      "uri" : "urn:uuid:30c683bc-69fb-4d93-8dad-65b663783af5",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/samtools-index.cwl",
    "mediatype" : "text/x-yaml",
    "createdOn" : "2017-08-24T10:57:47.458Z",
    "authoredBy" : [ {
      "uri" : "mailto:luka.stojanovic@sbgenomics.com",
      "name" : "Luka Stojanovic"
    }, {
      "uri" : "mailto:crusoe@ucdavis.edu",
      "name" : "Michael R. Crusoe"
    }, {
      "uri" : "mailto:porter@porter.st",
      "name" : "Andrey Kartashov"
    }, {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/samtools-index.cwl",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "conformsTo" : "https://w3id.org/cwl/v1.0",
    "bundledAs" : {
      "uri" : "urn:uuid:8235d3f8-6927-4f73-b160-8521838a1cbb",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/lobSTR-tool.cwl",
    "mediatype" : "text/x-yaml",
    "createdOn" : "2017-08-24T10:57:47.476Z",
    "authoredBy" : [ {
      "uri" : "mailto:luka.stojanovic@sbgenomics.com",
      "name" : "Luka Stojanovic"
    }, {
      "uri" : "mailto:crusoe@ucdavis.edu",
      "name" : "Michael R. Crusoe"
    }, {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/lobSTR-tool.cwl",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "conformsTo" : "https://w3id.org/cwl/v1.0",
    "bundledAs" : {
      "uri" : "urn:uuid:7fa6fbe4-1fc5-4cb5-9c1a-56b96c5f7aaf",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/allelotype.cwl",
    "mediatype" : "text/x-yaml",
    "createdOn" : "2017-08-24T10:57:47.537Z",
    "authoredBy" : [ {
      "uri" : "mailto:luka.stojanovic@sbgenomics.com",
      "name" : "Luka Stojanovic"
    }, {
      "uri" : "mailto:janko.simonovic@sbgenomics.com",
      "name" : "Janko Simonovic"
    }, {
      "uri" : "mailto:crusoe@ucdavis.edu",
      "name" : "Michael R. Crusoe"
    }, {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/allelotype.cwl",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "conformsTo" : "https://w3id.org/cwl/v1.0",
    "bundledAs" : {
      "uri" : "urn:uuid:3706bd2f-e53f-431d-b32a-deb661d9b292",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/README",
    "mediatype" : "application/octet-stream",
    "createdOn" : "2017-08-24T10:57:47.555Z",
    "authoredBy" : [ {
      "uri" : "mailto:crusoe@ucdavis.edu",
      "name" : "Michael R. Crusoe"
    }, {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/README",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "bundledAs" : {
      "uri" : "urn:uuid:ed54c4d6-c585-4dc9-b7bc-0cf299e20b91",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/workflow/tmp_1.fq",
    "mediatype" : "application/octet-stream",
    "createdOn" : "2017-08-24T10:57:47.738Z",
    "authoredBy" : [ {
      "uri" : "mailto:peter.amstutz@curoverse.com",
      "name" : "Peter Amstutz"
    } ],
    "retrievedFrom" : "https://raw.githubusercontent.com/common-workflow-language/workflows/lobstr-v1/workflows/lobSTR/tmp_1.fq",
    "retrievedBy" : {
      "uri" : "https://view.commonwl.org",
      "name" : "Common Workflow Language Viewer"
    },
    "bundledAs" : {
      "uri" : "urn:uuid:5d431f81-ad0b-4acf-903a-9d5aa03b04df",
      "folder" : "/workflow/"
    }
  }, {
    "uri" : "/visualisation.png",
    "mediatype" : "image/png",
    "createdOn" : "2017-08-24T10:57:47.801Z",
    "retrievedFrom" : "https://view.commonwl.org/graph/png/github.com/common-workflow-language/workflows/blob/lobstr-v1/workflows/lobSTR/lobSTR-workflow.cwl",
    "bundledAs" : {
      "uri" : "urn:uuid:ff9ace37-e76c-49f8-8d36-60f11ff6d257",
      "folder" : "/"
    }
  }, {
    "uri" : "/visualisation.svg",
    "mediatype" : "image/svg+xml",
    "createdOn" : "2017-08-24T10:57:47.821Z",
    "retrievedFrom" : "https://view.commonwl.org/graph/svg/github.com/common-workflow-language/workflows/blob/lobstr-v1/workflows/lobSTR/lobSTR-workflow.cwl",
    "bundledAs" : {
      "uri" : "urn:uuid:a6cfb437-8818-4ab2-9081-efc74c5109e8",
      "folder" : "/"
    }
  } ],
  "annotations" : [ {
    "uri" : "urn:uuid:9f602fff-b280-41c5-9590-ab95a49c85ad",
    "about" : "/",
    "content" : "annotations/merged.cwl"
  }, {
    "uri" : "urn:uuid:0ce4b727-ff61-4534-9afb-e3d676d2782d",
    "about" : "/",
    "content" : "annotations/workflow.ttl"
  } ]
}
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: Workflow

label: "Hello World"
doc: "Outputs a message using echo"

inputs: []

outputs:
  response:
    outputSource: step0/response
    type: File

steps:
  step0:
    run:
      class: CommandLineTool
      inputs:
        message:
          type: string
          doc: "The message to print"
          default: "Hello World"
          inputBinding:
            position: 1
      baseCommand: echo
      stdout: response.txt
      outputs:
        response:
          type: stdout
    in: []
    out: [response]

Prototype PROV+RO export
CWL reference implementation

 

Copyright © 2013 W3C® (MIT, ERCIM, Keio, Beihang), All Rights Reserved. W3C liability, trademark and document use rules apply.

PROV Model Primer

W3C Working Group Note 30 April 2013

Which PROV format?

<prov:wasGeneratedBy>
  <prov:entity prov:ref="ex:ent1"/>
  <prov:activity prov:ref="ex:act1"/>
  <prov:time>2017-10-26T21:32:52Z</prov:time>
  <ex:port>p1</ex:port>
</prov:wasGeneratedBy>
wasGeneratedBy(ent1, act1, 
  2017-10-26T21:32:52Z, ex:port="p1")
:ent1
  a prov:Entity;
  prov:wasGeneratedBy :act1;
  prov:generatedAtTime "2017-10-26T21:32:52Z"^^xsd:dateTime ;
  ex:port "p1" .
    "wasGeneratedBy": {
        "ex:gen1": {
            "prov:entity": "ent1",
            "prov:activity": "act1",
            "prov:time": "2017-10-26T21:32:52Z",
            "ex:port": "p1"
        },
    },
{ "@context": { .. }, 
  "@id": "ent1",
  "@type": "prov:Entity",
  "ex:port": "p1",
  "prov:generatedAtTime":  "2017-10-26T21:32:52Z",
  "prov:wasGeneratedBy": {
    "@id": "act1",
    "@type": "prov:Activity"
  } 
}

PROV-N

PROV-XML

PROV-JSON

PROV-O Turtle

PROV-O JSON-LD

How to tie it all together?

A Research Object bundles and relates digital resources of a scientific experiment or investigation:

 

Data used and results produced in experimental study

Methods employed to produce and analyse that data

Provenance and settings for the experiments

People involved in the investigation

Annotations about these resources, to improve understanding and interpretation

id:        doi:10.15490/seek.1.investigation.56
createdOn: 2015-07-10T16:46:00Z
createdBy: http://orcid.org/0000-0001-9842-9718

aggregates:
 - id:         data/sequence/specimen5.bam
   conformsTo: http://gemrb.org/iesdp/file_formats/ie_formats/bam_v1.htm

 - id:         http://example.com/blog/about-specimen5
   authoredBy: http://orcid.org/0000-0001-7066-3350

 - id:         http://www.myexperiment.org/workflows/3355
   history:    provenance/workflow-evolution.ttl

annotations:
 - about:       data/sequence/specimen5.bam
   content:     annotations/specimen5-properties.jsonld
   createdBy:   http://orcid.org/0000-0001-7066-3350

 - about:       data/sequence/specimen5.bam
   content:     http://example.com/blog/about-specimen5
   motivatedBy: oa:questioning

Research Object manifest

(simplified)

Reuse standards:
OAI-ORE, BagIt, W3C JSON-LD, PROV, Web Annotation Model

metadata/manifest.json
data/sequence/specimen5.bam
provenance/workflow-evolution.ttl
http://example.com/blog/about-specimen5
http://www.myexperiment.org/workflows/335

http://orcid.org/0000-0001-7066-3350
http://gemrb.org/iesdb/
   file_formats_ie_formats_bam_v1.html

Who is using Research Objects?

Workflow provenance

One activity per step execution

 

 

activity(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, , , 
   [prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main"])    
    // main workflow run started outside somehow (we're don't know how)
    wasStartedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, 
                 -, 2017-10-27T15:00:00Z)
    // ...
    // step is a nested workflow, so also a WorkflowRun
    activity(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, 
      [prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main/nested1"])
        // started by the mother activity
        wasStartedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, 
                     run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:30Z)
    
        // inner step of nested workflow, ProcessRun as this is a command line execution
        activity(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, - 
          [prov:type='wfprov:ProcessRun', prov:label="Run of workflow/packed.cwl#nested/innerStep1"])
            
        wasStartedBy(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, -, 
                     run:4305467e-6dfb-11e7-885d-0242ac110002, 2017-10-27T15:01:00Z)
        // ...

Identifying intermediate data

Output 1B file is also Input 2C and Input 3D downstream

Simple filenames -> duplications

  ./data/step1/outputB.txt 
./data/step2/inputC.txt
./data/step3/inputD.txt

 

Content-adressable

SHA-256 hash of bytes as filename:

./data/51/51fb8af0c4ae0422fbe88340d91880ecb9d7537cf57339c1cf1256b7ca58f32d

RFC6920 URI as global identifier:

nih:sha-256;51fb8af0c4ae0422fbe88340d91880ecb9d7537cf57339c1cf1256b7ca58f32d
arcp://uuid,32a423d6-52ab-47e3-a9cd-54f418a48571/css/base.css
arcp://ni,sha-256;f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk/src/luhn.c

UUID

Hash (RFC6920)

Workflow provenance profiles

2018-05-15 Research Objects and CWL

By Stian Soiland-Reyes

2018-05-15 Research Objects and CWL

Presented in meeting with BioSimSpace

  • 2,277