Snakemake 6.0

New Features:

  • a new module system
  • rule inheritance
  • automatic containerization

A new module system

module some_module:
    snakefile:
        "workflow/modules/some_module/Snakefile"

declare modules to be used in your workflow

A new module system

module some_module:
    snakefile:
        "workflow/modules/some_module/Snakefile"


use rule * from some_module

declare rule usage from module

A new module system

module some_module:
    snakefile:
        "workflow/modules/some_module/Snakefile"


use rule * from some_module as some_prefix_*

modify rule names to avoid clashes

A new module system

module some_module:
    snakefile:
        "workflow/modules/some_module/Snakefile"
    replace_prefix:
        {
            "results/": "results/some_module/",
            "resources/": "resources/some_module/",
        }

use rule * from some_module as some_prefix_*

modify input and output prefixes

A new module system

module some_module:
    snakefile:
        "workflow/modules/some_module/Snakefile"

use rule * from some_module as some_prefix_*

use rule map_reads from some_module as some_prefix_map_reads with:
    params:
        sort="coordinate"

modify specific rules from the module

A new module system

configfile: "config.yaml"


def get_input(wildcards):
    return config["samples"][wildcards.sample]


module bwa_mapping:
    meta_wrapper:
        "0.72.0/meta/bio/bwa_mapping"


use rule * from bwa_mapping


use rule bwa_mem from bwa_mapping with:
    input:
        get_input

A new module system

configfile: "config/config.yaml"

module rna_seq:
    snakefile:
        "https://github.com/snakemake-workflows/rna-seq-kallisto-sleuth/raw/v2.0.1/workflow/Snakefile"
    config:
        config["rna-seq"]

module dna_seq:
    snakefile:
        "https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/raw/v2.0.1/Snakefile"
    config:
        config["dna-seq"]
        

use rule * from rna_seq as rna_seq_*

use rule * from dna_seq as dna_seq_*
        

easily combine multiple workflows into one

A new module system

configfile: "config/config.yaml"

module rna_seq:
    snakefile:
        "https://github.com/snakemake-workflows/rna-seq-kallisto-sleuth/raw/v2.0.1/workflow/Snakefile"
    config:
        config["rna-seq"]

module dna_seq:
    snakefile:
        "https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/raw/v2.0.1/Snakefile"
    config:
        config["dna-seq"]
        

use rule * from rna_seq as rna_seq_*

use rule * from dna_seq as dna_seq_*


rule some_integrated_analysis:
    input:
        calls="results/calls/all.vcf.gz"
        diffexp="results/diffexp/all.tsv"
    output:
        "results/integrated-analysis/all.svg"
    notebook:
        "workflow/notebooks/integrated-analysis.r.ipynb"
        

make extensions and modifications transparent

Rule inheritance

rule a:
    output:
        "test.out"
    shell:
        "echo test > {output}"


use rule a as b with:
    output:
        "test2.out"

reuse and modify existing rules in the same workflow

Automatic containerization

fast and ad-hoc software stack definition
using conda packages

rule analyze_stuff:
    input:
        "resources/raw-data.tsv"
    output:
        "results/matrix.tsv"
    conda:
        "envs/pandas.yaml"
    script:
        "scripts/analyze-stuff.py.ipynb"


rule plot_stuff:
    input:
        "results/matrix.tsv"
    output:
        "results/plots/myplot.pdf"
    conda:
        "envs/ggplot.yaml"
    notebook:
        "notebooks/plot-stuff.r.ipynb"

Automatic containerization

containerization automatically yields a
transparent yet concise dockerfile

FROM condaforge/mambaforge:latest
LABEL io.github.snakemake.containerized="true"
LABEL io.github.snakemake.conda_env_hash="729e69b7e0a6c76ba7a5f69bd51474f68d37443999e0952f0e9d63bb0d9cfe92"

# Step 1: Retrieve conda environments

# Conda environment:
#   source: envs/ggplot.yaml
#   prefix: /conda-envs/dcef9d5a2891d184878bd1d9bde72a52
#   channels:
#     - conda-forge
#   dependencies:
#     - r-base 4.0
#     - r-ggplot2 3.3
RUN mkdir -p /conda-envs/dcef9d5a2891d184878bd1d9bde72a52
COPY envs/ggplot.yaml /conda-envs/dcef9d5a2891d184878bd1d9bde72a52/environment.yaml

# Conda environment:
#   source: envs/pandas.yaml
#   prefix: /conda-envs/250d5a01e8ff0d636f8f5d03dee073b7
#   channels:
#     - conda-forge
#   dependencies:
#     - python 3.9
#     - pandas 1.2
RUN mkdir -p /conda-envs/250d5a01e8ff0d636f8f5d03dee073b7
COPY envs/pandas.yaml /conda-envs/250d5a01e8ff0d636f8f5d03dee073b7/environment.yaml

# Step 2: Generate conda environments

RUN mamba env create --prefix /conda-envs/dcef9d5a2891d184878bd1d9bde72a52 --file /conda-envs/dcef9d5a2891d184878bd1d9bde72a52/environment.yaml && \
    mamba env create --prefix /conda-envs/250d5a01e8ff0d636f8f5d03dee073b7 --file /conda-envs/250d5a01e8ff0d636f8f5d03dee073b7/environment.yaml && \
    mamba clean --all -y

Automatic containerization

build, upload and use the resulting container image

container: "quay.io/some-username/my-workflow-image:1.0"


rule analyze_stuff:
    input:
        "resources/raw-data.tsv"
    output:
        "results/matrix.tsv"
    conda:
        "envs/pandas.yaml"
    script:
        "scripts/analyze-stuff.py.ipynb"


rule plot_stuff:
    input:
        "results/matrix.tsv"
    output:
        "results/plots/myplot.pdf"
    conda:
        "envs/ggplot.yaml"
    notebook:
        "notebooks/plot-stuff.r.ipynb"

Snakemake 6.0

By Johannes Köster

Snakemake 6.0

New features in Snakemake 6.0

  • 3,348