Skip to content

Commit

Permalink
Merge pull request #150 from fnothaft/issues/149-quinine
Browse files Browse the repository at this point in the history
Add Quinine container (resolves #146, #149)
  • Loading branch information
hannes-ucsc authored Jul 26, 2016
2 parents 339ee9f + 66d969e commit 1a7a5cd
Show file tree
Hide file tree
Showing 33 changed files with 758 additions and 11 deletions.
4 changes: 2 additions & 2 deletions adam/build/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM quay.io/ucsc_cgl/spark-and-maven:1.5.2.3.3.3-build
FROM quay.io/ucsc_cgl/spark-and-maven:1.5.2.3.3.9

MAINTAINER Frank Austin Nothaft, [email protected]

Expand All @@ -11,4 +11,4 @@ RUN git clone https:/bigdatagenomics/adam.git
WORKDIR /home/adam
RUN git checkout c251f79c6bde3dce12e685c6cf03d5b1c30e9273

RUN /opt/apache-maven-3.3.3/bin/mvn package -DskipTests
RUN /opt/apache-maven-3.3.9/bin/mvn package -DskipTests
4 changes: 2 additions & 2 deletions conductor/build/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM quay.io/ucsc_cgl/spark-and-maven:1.5.2.3.3.3-build
FROM quay.io/ucsc_cgl/spark-and-maven:1.5.2.3.3.9

MAINTAINER Frank Austin Nothaft, [email protected]

Expand All @@ -8,7 +8,7 @@ RUN git clone https:/BD2KGenomics/conductor.git

# build conductor
WORKDIR /home/conductor
RUN /opt/apache-maven-3.3.3/bin/mvn package \
RUN /opt/apache-maven-3.3.9/bin/mvn package \
-DskipTests \
-Dhadoop.version=2.6.0 \
-Dspark.version=1.5.2
3 changes: 3 additions & 0 deletions quinine-pipelines/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.workflow.json
cromwell-*
WorkflowStore
29 changes: 29 additions & 0 deletions quinine-pipelines/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
FROM quay.io/ucsc_cgl/quinine

MAINTAINER Frank Austin Nothaft, [email protected]

RUN apt-get update && apt-get install -y \
python-dev \
python-pip \
libnss3 \
git \
curl \
wget \
apt-transport-https \
ca-certificates

# Install Toil
RUN pip install toil==3.3.0

# Install quinine pipeline
RUN git clone https:/fnothaft/toil-scripts /opt/toil-scripts
RUN cd /opt/toil-scripts && git checkout issues/380-quinine

# Install the pip dependencies
RUN cd /opt/toil-scripts && make develop

COPY quinine-pipelines.sh /opt/toil-scripts/
COPY README.md /opt/toil-scripts/

ENTRYPOINT ["/bin/bash", "-c"]
CMD [ "/opt/toil-scripts/quinine-pipelines.sh --help"]
27 changes: 27 additions & 0 deletions quinine-pipelines/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Definitions
runtime_fullpath = $(realpath runtime)
build_tool = runtime-container.DONE
git_commit ?= $(shell git log --pretty=oneline -n 1 -- ../quinine-pipelines | cut -f1 -d " ")
name = quay.io/ucsc_cgl/quinine-pipelines
tag = 716dd26cf6252a6db60afa25aaf7cf9ee9896b21--${git_commit}

workflows = contamination.workflow.json rna.workflow.json targeted.workflow.json

build:
docker build -t ${name}:${tag} .
docker tag -f ${name}:${tag} ${name}:latest
touch ${build_tool}

push: build
# Requires ~/.dockercfg
docker push ${name}:${tag}
docker push ${name}:latest

test: build ${workflows}
python test.py -b

%.workflow.json: %.workflow.json.template
sed -e "s:PWD:${PWD}:g" $< > $@

clean:
-rm ${build_tool} *.workflow.json
42 changes: 42 additions & 0 deletions quinine-pipelines/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Computational Genomics Lab, Genomics Institute, UC Santa Cruz
### Running the Quinine pipeline container

This guide will walk through running the Quinine pipeline. If you
find any errors or corrections please feel free to make a pull request against
the [cgl-docker-lib](https:/BD2KGenomics/cgl-docker-lib) repository,
where this Docker image is maintained. Feedback of any kind is appreciated.

## Overview

This container runs the [Quinine](https:/bigdatagenomics/quinine) QC
tool. This tool builds on top of the
[ADAM](https:/bigdatagenomics/adam) platform for processing genomic
data using [Apache Spark](https://spark.apache.org) and the [Toil](
https:/BD2KGenomics/toil) workflow management system.

This container runs three separate workflows:

1. RNA-seq QC: Computes a set of quality control metrics for RNA-seq data.
2. Targeted QC: Computes a set of quality control metrics for targeted
sequencing data captured using hybrid selection bait.
3. Contamination estimation: Estimates the inter-sample contamination using VCF
files to compute the background allele frequency, and by then looking at read
data from homozygous alt sites in a sample.

## Testing

There is an automated test included simply install Docker, make, and Python 2.7 for your
platform and do the following:

```
make test
```

This test runs on a small set of test inputs, and tests all three workflows.

## Running

See `test.py` for example tool invocations. Additionally, we have provided WDL
workflows for each of the three stages. These workflows run by default on the
test files in the `test/` directory. If `cromwell` is on your path, or
`${CROMWELL_HOME}` is set, these WDL files will be run as part of `make test`.
23 changes: 23 additions & 0 deletions quinine-pipelines/contamination.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
task contamination {
File reads
File population_vcf
File sample_vcf
String mem

command {
/opt/toil-scripts/quinine-pipelines.sh contamination --reads ${reads} --population ${population_vcf} --sample-vcf ${sample_vcf} --memory ${mem} --output `pwd`/contamination.txt
}

runtime {
docker: "quay.io/ucsc_cgl/quinine-pipelines"
}

output {
File response = "contamination.txt"
}

}

workflow wf {
call contamination
}
7 changes: 7 additions & 0 deletions quinine-pipelines/contamination.workflow.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"wf.contamination.reads": "PWD/test/contaminated.sam",
"wf.contamination.population_vcf": "PWD/test/population.vcf",
"wf.contamination.sample_vcf": "PWD/test/call.vcf",
"wf.contamination.mem": "1",
"wf.contamination.out": "PWD/test/contamination.txt"
}
16 changes: 16 additions & 0 deletions quinine-pipelines/quinine-pipelines.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

set -e

# make a temp dir for the jobstore
jobStoreDir=$(mktemp -d -t jobStoreXXXXX)

# run
export PYTHONPATH=/opt/toil-scripts/src/
python -m toil_scripts.quinine_pipelines.metrics $@ \
--defaultDisk 0 \
--maxDisk 0 \
${jobStoreDir}/jobStore

# remove the jobstore
rm -rf jobStoreDir
22 changes: 22 additions & 0 deletions quinine-pipelines/rna.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
task rna {
File reads
File transcriptome
String mem

command {
/opt/toil-scripts/quinine-pipelines.sh rna --reads ${reads} --transcriptome ${transcriptome} --memory ${mem} --output `pwd`/rna.txt
}

runtime {
docker: "quay.io/ucsc_cgl/quinine-pipelines"
}

output {
File response = "rna.txt"
}

}

workflow wf {
call rna
}
6 changes: 6 additions & 0 deletions quinine-pipelines/rna.workflow.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"wf.rna.reads": "PWD/test/small.sam",
"wf.rna.transcriptome": "PWD/test/small.transcripts.gtf",
"wf.rna.mem": "1",
"wf.rna.out": "PWD/test/rna.txt"
}
23 changes: 23 additions & 0 deletions quinine-pipelines/targeted.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
task targeted {
File reads
File bait
File targets
String mem

command {
/opt/toil-scripts/quinine-pipelines.sh targeted --reads ${reads} --bait ${bait} --targets ${targets} --output `pwd`/targeted.txt
}

runtime {
docker: "quay.io/ucsc_cgl/quinine-pipelines"
}

output {
File response = "targeted.txt"
}

}

workflow wf {
call targeted
}
7 changes: 7 additions & 0 deletions quinine-pipelines/targeted.workflow.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"wf.targeted.reads": "PWD/test/small.sam",
"wf.targeted.bait": "PWD/test/small.bait.bed",
"wf.targeted.targets": "PWD/test/small.targets.bed",
"wf.targeted.mem": "1",
"wf.targeted.out": "PWD/test/targeted.txt"
}
Loading

0 comments on commit 1a7a5cd

Please sign in to comment.