End-To-End Testing Pipeline for Phasing and Imputation

1 Factory Pattern

1 Factory Pattern

def main(order):
    piper = PiperFactory.create_piper(order)
    play_pipe(piper)


def play_pipe(piper):
    piper.pipe_stdin()
    cmd = piper.render_user_cmd(order=piper.order,)
    piper.run_user_script(cmd)
    piper.collect_output_files()
    piper.upload_output_files()
    piper.teardown()

1.1

├── seqpiper
│   ├── base
│   ├── bam
│   ├── vcf
│   │   ├── base.py
│   │ 
│   ├── main.py
│
├── tests
    ├── phasing.sh
    ├── imputation.sh

1.2 Advantage of doing this

class VcfPiperBase(PiperAbstractFactory):
    def pipe_stdin(self):
        some_vcf_function()
class PiperAbstractFactory(object):
    @abstractmethod
    def stdin(self):
        pass
  
    def render_user_cmd(self):
        some code
    
    def run_user_script(self):
        some code
    
    def collect_output_files(self):
        some code
    
    def upload_output_files(self):
        some code
    
    def teardown(self):
        some code

2 Prevent Import Error

import sys
import traceback

sys.path.append('/usr/local/seqslab/SeqPiper')

from script.library.GatkWrapper import ApplyRecalibration
from script.common.base import SeqPiperBase
from script.common.order import JobOrder
from script.common.exception import SeqPiperEmptyVariantFile

2.2 Formatter Would Do This...

import sys
import traceback

from script.library.GatkWrapper import ApplyRecalibration
from script.common.base import SeqPiperBase
from script.common.order import JobOrder
from script.common.exception import SeqPiperEmptyVariantFile

sys.path.append('/usr/local/seqslab/SeqPiper')

2.3 ModuleNotFoundError!

2.4 Build SeqPiper Package

setup(
    name="seqpiper",
    version="0.0.1",
    long_description=readme,
    long_description_content_type="text/markdown",
    url="",
    author="",
    author_email="",
    license="",
    install_requires=[],
    keywords="bioinformatics, seqpiper",
    packages=find_packages(),
)
python3 setup.py install

2.5 Remove sys.path.append

import sys
import traceback

from script.library.GatkWrapper import ApplyRecalibration
from script.common.base import SeqPiperBase
from script.common.order import JobOrder
from script.common.exception import SeqPiperEmptyVariantFile

3. CI Pipeline

3.1 How to add new test case?

/opt/bitnami/spark/bin/spark-submit \
  --class net.vartotal.piper.cli.PiperMain \
  --driver-cores 1 \
  --driver-memory 1g \
  --executor-cores 1 \
  --executor-memory 10g \
  --conf "spark.dynamicAllocation.enabled=false" \
  --conf "spark.executor.extraJavaOptions=-XX:+UseG1GC" \
  --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
  --conf "spark.kryo.registrator=net.vartotal.piper.serialization.ADAMKryoRegistrator" \
  --conf "spark.executor.memoryOverhead=14g" \
  --conf "spark.speculation=false" \
  /usr/local/seqslab/SeqPiper/target/piper-2.1.0.jar vcf \
  --reference-version 38 \
  --reference-system GRCH \
  --piper-script fixtures.scripts.no_stdout \
  -i 0=/usr/local/seqslab/SeqPiper/fixtures/vcf/ \
  --samplePath /usr/local/seqslab/SeqPiper/fixtures/vcf/ \
  --vcf-output-path vcf_gz_folder \
  --partition-bed-path /usr/local/seqslab/SeqPiper/fixtures/bed/38/chromosomes

3.2 How to add new test case?

├── seqpiper
│   ├── base
│   ├── bam
│   ├── vcf
│   │   ├── base.py
│   │ 
│   ├── main.py
│
├── tests
    ├── phasing.sh
    ├── imputation.sh

3.3 CI Pipeline

3.4 Advantage of Doing This

  1. Spend less on AzureBatch for testing
  2. Save some time to test pipeline

deck

By davidtnfsh