Stian Soiland-Reyes

eScience lab, The University of Manchester

@soilandreyes

http://orcid.org/0000-0001-9842-9718
http://slides.com/soilandreyes/2017-11-16-cwl/

2017-11-16

This work has been done as part of the BioExcel CoE (www.bioexcel.eu), a project funded by the European Union contract H2020-EINFRA-2015-1-675728.

cwlVersion: v1.0
class: Workflow
inputs:
  inp: File
  ex: string

outputs:
  classout:
    type: File
    outputSource: compile/classfile

steps:
  untar:
    run: tar-param.cwl
    in:
      tarfile: inp
      extractfile: ex
    out: [example_out]

  compile:
    run: arguments.cwl
    in:
      src: untar/example_out
    out: [classfile]
document
prefix wfprov <http://purl.org/wf4ever/wfprov#>
prefix prov <http://www.w3.org/ns/prov#>
prefix wfdesc <http://purl.org/wf4ever/wfdesc#>
prefix wf <https://w3id.org/cwl/view/git/933bf2a1a1cce32d88f88f136275535da9df0954/workflows/hello/hello.cwl#>
prefix input <app://579c1b74-b328-4da6-80a8-a2ffef2ac9b5/workflow/input.json#>
prefix run <urn:uuid:>
prefix engine <urn:uuid:>
prefix data <urn:hash:sha256:>

default <app://579c1b74-b328-4da6-80a8-a2ffef2ac9b5/>

// Level 1 provenance of workflow run

activity(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, , , [prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main"])    
    wasStartedBy(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, -, -, -, 2017-10-27T14:24:00+01:00)  

    // The engine is the SoftwareAgent that is executing our Workflow plan
    wasAssociatedWith(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, wf:main)
        agent(engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, prov:type='prov:SoftwareAgent', prov:type='wfprov:WorkflowEngine', prov:label="cwltool v1.2.5")
        // prov has no term to relate sub-plans - we'll use wfdesc:hasSubProcess
        entity(wf:main,[prov:type='wfdesc:Workflow', prov:type='prov:Plan', wfdesc:hasSubProcess='wf:main/step1', wfdesc:hasSubProcess='wf:main/step2'])
            alternateOf(wf:main, workflow/packed.cwl)
            entity(wf:main/step1,[prov:type='wfdesc:Process', prov:type='prov:Plan'])
            entity(wf:main/step2,[prov:type='wfdesc:Process', prov:type='prov:Plan'])            

    // First the workflow uses some data; here with a urn:sha:sha256 identifier
    used(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, 2017-10-27T14:29:00+01:00, [prov:role='wf:main/input1']))
        entity(data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, [prov:type='wfprov:Artifact'])
            // which we have stored a copy of within the research object
            specializationOf(data/58/5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03)

    // Then there was another activity - wfprov:ProcessRun indicating a command line tool
    activity(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, [prov:type='wfprov:ProcessRun', prov:label="Run of workflow/packed.cwl#main/step1"])
        // started by the mother activity
        wasStartedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:00+01:00)
        // same engine using step1 as plan. In a distributed scenario there might be a different engine
        wasAssociatedWith(run:4305467e-6dfb-11e7-885d-0242ac110002, engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, wf:main/step1)
        // This activity also use the same data, but in a different role (e.g. input parameter)
        used(run:4305467e-6dfb-11e7-885d-0242ac110002, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, 2017-10-27T14:00:00+01:00, [prov:role='wf:main/step1/in1'])

        // And we generate some new data
        wasGeneratedBy(data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, run:4305467e-6dfb-11e7-885d-0242ac110002, 2017-10-27T16:00:00+01:00, [prov:role='wf:main/step1/out1']))
            entity(data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, [prov:type='wfprov:Artifact'])
                // again stored in the RO
                specializationOf(data/00/00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c)

        // step1 finished
        wasEndedBy(run:4305467e-6dfb-11e7-885d-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:30:00+01:00)

    // the master workflow then "generate" that same value, but now at a different time and role (the resultA master workflow output)
    wasGeneratedBy(data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/resultA'])

    // next step activity
    activity(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, - [prov:type='wfprov:ProcessRun', prov:label="Run of workflow/packed.cwl#main/step2"])
        wasStartedBy(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T16:00:00+01:00)
        // associated with step2
        wasAssociatedWith(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, engine:b2210211-8acb-4d58-bd28-2a36b18d3b4f, wf:main/step2)
        
        // Uses two data artifacts; one which came from previous step, other as workflow input
        used(run:4305467e-6dfb-11e7-885d-0242ac110002, data:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/step2/valueA'])
        used(run:4305467e-6dfb-11e7-885d-0242ac110002, data:00688350913f2f292943a274b57019d58889eda272370af261c84e78e204743c, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/step2/valueB'])
        
        // and generate two new data artifacts
        wasGeneratedBy(data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, run:c42dc36e-6dfd-11e7-bc24-0242ac110002,  2017-10-27T16:34:20+01:00, [prov:role='wf:main/step2/out1'])))
            entity(data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, [prov:type='wfprov:Artifact'])
                specializationOf(data/95/2f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d)

        wasGeneratedBy(data:3deb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0, run:c42dc36e-6dfd-11e7-bc24-0242ac110002,  2017-10-27T16:34:20+01:00, [prov:role='wf:main/step2/out2'])))
            entity(data:3deb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0, [prov:type='wfprov:Artifact'])
                specializationOf(data/3d/eb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0, data:3deb00bd0decd1f21d015a178c4f23a5eb537588c08eeee9d55059ec29637be0)
        // step2 ends
        wasEndedBy(run:c42dc36e-6dfd-11e7-bc24-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T16:30:00+01:00)

    // only step output out1 captured by mother workflow, sent to resultB workflow output
    wasGeneratedBy(data:952f537d1f3116db56703787ace248fe00ae46fa77ea3803aa3d8dc01d221a9d, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T15:00:00+01:00, [prov:role='wf:main/resultB'])

    // mother workflow ends
    wasEndedBy(run:2e1287e0-6dfb-11e7-8acf-0242ac110002, -, -, run:2e1287e0-6dfb-11e7-8acf-0242ac110002, 2017-10-27T16:34:40+01:00)

endDocument

PROV

2017-11-16 Common Workflow Language

By Stian Soiland-Reyes

2017-11-16 Common Workflow Language

  • 2,230