bio-workflow-management-wdl-workflows
SKILL.md
WDL Workflows
Basic Task Definition
version 1.0
task fastqc {
input {
File fastq
Int threads = 2
}
command <<<
fastqc -t ~{threads} ~{fastq}
>>>
output {
File html = glob("*_fastqc.html")[0]
File zip = glob("*_fastqc.zip")[0]
}
runtime {
docker: "biocontainers/fastqc:v0.11.9"
cpu: threads
memory: "4 GB"
}
}
Simple Workflow
version 1.0
workflow rnaseq {
input {
File fastq_1
File fastq_2
File salmon_index
}
call fastp {
input:
reads_1 = fastq_1,
reads_2 = fastq_2
}
call salmon_quant {
input:
reads_1 = fastp.trimmed_1,
reads_2 = fastp.trimmed_2,
index = salmon_index
}
output {
File quant_sf = salmon_quant.quant_file
}
}
Task with All Sections
version 1.0
task bwa_mem {
input {
File reference
File reference_index
File reads_1
File reads_2
String sample_id
Int threads = 8
}
Int disk_size = ceil(size(reference, "GB") + size(reads_1, "GB") * 3) + 20
command <<<
bwa mem -t ~{threads} -R "@RG\tID:~{sample_id}\tSM:~{sample_id}" \
~{reference} ~{reads_1} ~{reads_2} | \
samtools sort -@ ~{threads} -o ~{sample_id}.sorted.bam
samtools index ~{sample_id}.sorted.bam
>>>
output {
File bam = "~{sample_id}.sorted.bam"
File bai = "~{sample_id}.sorted.bam.bai"
}
runtime {
docker: "biocontainers/bwa:v0.7.17"
cpu: threads
memory: "16 GB"
disks: "local-disk " + disk_size + " HDD"
}
}
Scatter (Parallel Execution)
version 1.0
workflow process_samples {
input {
Array[File] fastq_files
File reference
}
scatter (fastq in fastq_files) {
call align {
input:
fastq = fastq,
reference = reference
}
}
output {
Array[File] bam_files = align.bam
}
}
Scatter with Paired Files
version 1.0
struct SampleFastqs {
String sample_id
File fastq_1
File fastq_2
}
workflow paired_alignment {
input {
Array[SampleFastqs] samples
File reference
}
scatter (sample in samples) {
call align {
input:
sample_id = sample.sample_id,
reads_1 = sample.fastq_1,
reads_2 = sample.fastq_2,
reference = reference
}
}
output {
Array[File] bams = align.bam
}
}
Conditional Execution
version 1.0
workflow conditional_qc {
input {
File fastq
Boolean run_qc = true
}
if (run_qc) {
call fastqc {
input:
fastq = fastq
}
}
output {
File? qc_report = fastqc.html
}
}
Structs and Complex Types
version 1.0
struct ReferenceData {
File fasta
File fasta_index
File dict
File? known_sites
}
workflow variant_calling {
input {
ReferenceData reference
Array[File] bam_files
}
scatter (bam in bam_files) {
call haplotype_caller {
input:
bam = bam,
ref_fasta = reference.fasta,
ref_index = reference.fasta_index,
ref_dict = reference.dict
}
}
}
Input JSON
{
"rnaseq.fastq_1": "data/sample1_R1.fq.gz",
"rnaseq.fastq_2": "data/sample1_R2.fq.gz",
"rnaseq.salmon_index": "ref/salmon_index",
"rnaseq.threads": 8
}
Array Inputs JSON
{
"process_samples.samples": [
{
"sample_id": "sample1",
"fastq_1": "data/sample1_R1.fq.gz",
"fastq_2": "data/sample1_R2.fq.gz"
},
{
"sample_id": "sample2",
"fastq_1": "data/sample2_R1.fq.gz",
"fastq_2": "data/sample2_R2.fq.gz"
}
],
"process_samples.reference": "ref/genome.fa"
}
Subworkflows
version 1.0
import "qc.wdl" as qc
import "align.wdl" as align
workflow main_pipeline {
input {
File fastq_1
File fastq_2
File reference
}
call qc.quality_control {
input:
reads_1 = fastq_1,
reads_2 = fastq_2
}
call align.alignment {
input:
reads_1 = quality_control.trimmed_1,
reads_2 = quality_control.trimmed_2,
reference = reference
}
}
Runtime Options
runtime {
docker: "ubuntu:20.04"
cpu: 4
memory: "8 GB"
disks: "local-disk 100 HDD"
preemptible: 3
maxRetries: 2
zones: "us-central1-a us-central1-b"
bootDiskSizeGb: 15
}
String Interpolation and Expressions
version 1.0
task process {
input {
String sample_id
Int memory_gb = 8
Array[File] input_files
}
Int memory_mb = memory_gb * 1000
String output_name = sample_id + ".processed.bam"
command <<<
# Access array elements
process_tool \
--memory ~{memory_mb} \
--inputs ~{sep=' ' input_files} \
--output ~{output_name}
>>>
output {
File result = output_name
}
}
File Size and Disk Calculation
version 1.0
task align {
input {
File reads_1
File reads_2
File reference
}
# Calculate disk: input files + 3x for outputs + buffer
Int disk_gb = ceil(size(reads_1, "GB") + size(reads_2, "GB") +
size(reference, "GB") * 2) + 50
command <<<
bwa mem ~{reference} ~{reads_1} ~{reads_2} > aligned.sam
>>>
runtime {
disks: "local-disk " + disk_gb + " SSD"
}
}
Complete RNA-seq Workflow
version 1.0
workflow rnaseq_pipeline {
input {
Array[String] sample_ids
Array[File] fastq_1_files
Array[File] fastq_2_files
File salmon_index
Int threads = 8
}
scatter (idx in range(length(sample_ids))) {
call fastp {
input:
sample_id = sample_ids[idx],
reads_1 = fastq_1_files[idx],
reads_2 = fastq_2_files[idx],
threads = threads
}
call salmon_quant {
input:
sample_id = sample_ids[idx],
reads_1 = fastp.trimmed_1,
reads_2 = fastp.trimmed_2,
index = salmon_index,
threads = threads
}
}
output {
Array[File] quant_files = salmon_quant.quant_sf
Array[File] fastp_reports = fastp.json_report
}
}
task fastp {
input {
String sample_id
File reads_1
File reads_2
Int threads = 4
}
command <<<
fastp -i ~{reads_1} -I ~{reads_2} \
-o ~{sample_id}_trimmed_R1.fq.gz \
-O ~{sample_id}_trimmed_R2.fq.gz \
--json ~{sample_id}_fastp.json \
--thread ~{threads}
>>>
output {
File trimmed_1 = "~{sample_id}_trimmed_R1.fq.gz"
File trimmed_2 = "~{sample_id}_trimmed_R2.fq.gz"
File json_report = "~{sample_id}_fastp.json"
}
runtime {
docker: "quay.io/biocontainers/fastp:0.23.4--hadf994f_2"
cpu: threads
memory: "4 GB"
}
}
task salmon_quant {
input {
String sample_id
File reads_1
File reads_2
File index
Int threads = 8
}
command <<<
salmon quant -i ~{index} -l A \
-1 ~{reads_1} -2 ~{reads_2} \
-o ~{sample_id}_salmon \
--threads ~{threads} --validateMappings
>>>
output {
File quant_sf = "~{sample_id}_salmon/quant.sf"
File quant_dir = "~{sample_id}_salmon"
}
runtime {
docker: "quay.io/biocontainers/salmon:1.10.0--h7e5ed60_0"
cpu: threads
memory: "16 GB"
}
}
Run Commands
# Validate WDL syntax
womtool validate workflow.wdl
# Generate inputs template
womtool inputs workflow.wdl > inputs.json
# Run with Cromwell (local)
java -jar cromwell.jar run workflow.wdl -i inputs.json
# Run with miniwdl (simpler local runner)
miniwdl run workflow.wdl -i inputs.json
# Run on Terra
# Upload WDL and inputs.json to Terra workspace
Execution Engines
| Engine | Use Case |
|---|---|
| Cromwell | Full-featured, Google Cloud, AWS, HPC |
| miniwdl | Lightweight local execution |
| Terra | Cloud platform with Cromwell backend |
| AnVIL | NIH cloud platform (Terra-based) |
| dxWDL | DNAnexus platform |
Related Skills
- workflow-management/cwl-workflows - CWL alternative
- workflow-management/snakemake-workflows - Python-based alternative
- workflow-management/nextflow-pipelines - Groovy-based alternative
Weekly Installs
3
Repository
gptomics/bioskillsGitHub Stars
339
First Seen
Jan 24, 2026
Security Audits
Installed on
trae2
windsurf1
opencode1
codex1
claude-code1
antigravity1