Johannes Köster
2019
https://koesterlab.github.io
dataset
results
"Let me do that by hand..."
dataset
results
dataset
dataset
dataset
dataset
dataset
"Let me do that by hand..."
dataset
results
dataset
dataset
dataset
dataset
dataset
automation
From raw data to final figures:
dataset
results
dataset
dataset
dataset
dataset
dataset
scalability
Handle parallelization:
automation
dataset
results
dataset
dataset
dataset
dataset
dataset
Handle deployment:
be able to easily execute analyses on a different system/platform/infrastructure
portability
scalability
automation
rule mytask:
input:
"data/{sample}.txt"
output:
"result/{sample}.txt"
shell:
"some-tool {input} > {output}"
rule mytask:
input:
"data/{sample}.txt"
output:
"result/{sample}.txt"
script:
"scripts/mytask.py"
rule mytask:
input:
"data/{sample}.txt"
output:
"result/{sample}.txt"
script:
"scripts/mytask.R"
import matplotlib.pyplot as plt
import pandas as pd
d = pd.read_table(snakemake.input[0])
d.hist(bins=snakemake.config["hist-bins"])
plt.savefig(snakemake.output[0])
rule mytask:
input:
"data/{sample}.txt"
output:
"result/{sample}.txt"
script:
"scripts/mytask.py"
rule mytask:
input:
"data/{sample}.txt"
output:
"result/{sample}.txt"
wrapper:
"0.24.0/bio/mytool"
rule mytask:
input:
"data/{sample}.txt"
output:
"result/{sample}.txt"
cwl:
"https://github.com/some/cwl-tool"
rule mytask:
input:
"path/to/{dataset}.txt"
output:
"result/{dataset}.txt"
script:
"scripts/myscript.R"
rule myfiltration:
input:
"result/{dataset}.txt"
output:
"result/{dataset}.filtered.txt"
shell:
"mycommand {input} > {output}"
rule aggregate:
input:
"results/dataset1.filtered.txt",
"results/dataset2.filtered.txt"
output:
"plots/myplot.pdf"
script:
"scripts/myplot.R"
workstation
compute server
cluster
grid computing
cloud computing
rule mytask:
input:
"path/to/{dataset}.txt"
output:
"result/{dataset}.txt"
conda:
"envs/mycommand.yaml"
shell:
"mycommand {input} > {output}"
channels:
- bioconda
- conda-forge
dependencies:
-mycommand =2.3.1
rule mytask:
input:
"path/to/{dataset}.txt"
output:
"result/{dataset}.txt"
singularity:
"docker://some/container"
shell:
"mycommand {input} > {output}"
singularity: "docker://some/os"
rule mytask:
input:
"path/to/{dataset}.txt"
output:
"result/{dataset}.txt"
conda:
"envs/mycommand.yaml"
shell:
"mycommand {input} > {output}"