bio-workflow-management-wdl-workflows

Installation

SKILL.md

WDL Workflows

Basic Task Definition

version 1.0

task fastqc {
    input {
        File fastq
        Int threads = 2
    }

    command <<<
        fastqc -t ~{threads} ~{fastq}
    >>>

    output {
        File html = glob("*_fastqc.html")[0]
        File zip = glob("*_fastqc.zip")[0]
    }

    runtime {
        docker: "biocontainers/fastqc:v0.11.9"
        cpu: threads
        memory: "4 GB"
    }
}

Simple Workflow

version 1.0

workflow rnaseq {
    input {
        File fastq_1
        File fastq_2
        File salmon_index
    }

    call fastp {
        input:
            reads_1 = fastq_1,
            reads_2 = fastq_2
    }

    call salmon_quant {
        input:
            reads_1 = fastp.trimmed_1,
            reads_2 = fastp.trimmed_2,
            index = salmon_index
    }

    output {
        File quant_sf = salmon_quant.quant_file
    }
}

Task with All Sections

version 1.0

task bwa_mem {
    input {
        File reference
        File reference_index
        File reads_1
        File reads_2
        String sample_id
        Int threads = 8
    }

    Int disk_size = ceil(size(reference, "GB") + size(reads_1, "GB") * 3) + 20

    command <<<
        bwa mem -t ~{threads} -R "@RG\tID:~{sample_id}\tSM:~{sample_id}" \
            ~{reference} ~{reads_1} ~{reads_2} | \
            samtools sort -@ ~{threads} -o ~{sample_id}.sorted.bam
        samtools index ~{sample_id}.sorted.bam
    >>>

    output {
        File bam = "~{sample_id}.sorted.bam"
        File bai = "~{sample_id}.sorted.bam.bai"
    }

    runtime {
        docker: "biocontainers/bwa:v0.7.17"
        cpu: threads
        memory: "16 GB"
        disks: "local-disk " + disk_size + " HDD"
    }
}

Scatter (Parallel Execution)

version 1.0

workflow process_samples {
    input {
        Array[File] fastq_files
        File reference
    }

    scatter (fastq in fastq_files) {
        call align {
            input:
                fastq = fastq,
                reference = reference
        }
    }

    output {
        Array[File] bam_files = align.bam
    }
}

Scatter with Paired Files

version 1.0

struct SampleFastqs {
    String sample_id
    File fastq_1
    File fastq_2
}

workflow paired_alignment {
    input {
        Array[SampleFastqs] samples
        File reference
    }

    scatter (sample in samples) {
        call align {
            input:
                sample_id = sample.sample_id,
                reads_1 = sample.fastq_1,
                reads_2 = sample.fastq_2,
                reference = reference
        }
    }

    output {
        Array[File] bams = align.bam
    }
}

Conditional Execution

version 1.0

workflow conditional_qc {
    input {
        File fastq
        Boolean run_qc = true
    }

    if (run_qc) {
        call fastqc {
            input:
                fastq = fastq
        }
    }

    output {
        File? qc_report = fastqc.html
    }
}

Structs and Complex Types

version 1.0

struct ReferenceData {
    File fasta
    File fasta_index
    File dict
    File? known_sites
}

workflow variant_calling {
    input {
        ReferenceData reference
        Array[File] bam_files
    }

    scatter (bam in bam_files) {
        call haplotype_caller {
            input:
                bam = bam,
                ref_fasta = reference.fasta,
                ref_index = reference.fasta_index,
                ref_dict = reference.dict
        }
    }
}

Input JSON

{
    "rnaseq.fastq_1": "data/sample1_R1.fq.gz",
    "rnaseq.fastq_2": "data/sample1_R2.fq.gz",
    "rnaseq.salmon_index": "ref/salmon_index",
    "rnaseq.threads": 8
}

Array Inputs JSON

{
    "process_samples.samples": [
        {
            "sample_id": "sample1",
            "fastq_1": "data/sample1_R1.fq.gz",
            "fastq_2": "data/sample1_R2.fq.gz"
        },
        {
            "sample_id": "sample2",
            "fastq_1": "data/sample2_R1.fq.gz",
            "fastq_2": "data/sample2_R2.fq.gz"
        }
    ],
    "process_samples.reference": "ref/genome.fa"
}

Subworkflows

version 1.0

import "qc.wdl" as qc
import "align.wdl" as align

workflow main_pipeline {
    input {
        File fastq_1
        File fastq_2
        File reference
    }

    call qc.quality_control {
        input:
            reads_1 = fastq_1,
            reads_2 = fastq_2
    }

    call align.alignment {
        input:
            reads_1 = quality_control.trimmed_1,
            reads_2 = quality_control.trimmed_2,
            reference = reference
    }
}

Runtime Options

runtime {
    docker: "ubuntu:20.04"
    cpu: 4
    memory: "8 GB"
    disks: "local-disk 100 HDD"
    preemptible: 3
    maxRetries: 2
    zones: "us-central1-a us-central1-b"
    bootDiskSizeGb: 15
}

String Interpolation and Expressions

version 1.0

task process {
    input {
        String sample_id
        Int memory_gb = 8
        Array[File] input_files
    }

    Int memory_mb = memory_gb * 1000
    String output_name = sample_id + ".processed.bam"

    command <<<
        # Access array elements
        process_tool \
            --memory ~{memory_mb} \
            --inputs ~{sep=' ' input_files} \
            --output ~{output_name}
    >>>

    output {
        File result = output_name
    }
}

File Size and Disk Calculation

version 1.0

task align {
    input {
        File reads_1
        File reads_2
        File reference
    }

    # Calculate disk: input files + 3x for outputs + buffer
    Int disk_gb = ceil(size(reads_1, "GB") + size(reads_2, "GB") +
                       size(reference, "GB") * 2) + 50

    command <<<
        bwa mem ~{reference} ~{reads_1} ~{reads_2} > aligned.sam
    >>>

    runtime {
        disks: "local-disk " + disk_gb + " SSD"
    }
}

Complete RNA-seq Workflow

version 1.0

workflow rnaseq_pipeline {
    input {
        Array[String] sample_ids
        Array[File] fastq_1_files
        Array[File] fastq_2_files
        File salmon_index
        Int threads = 8
    }

    scatter (idx in range(length(sample_ids))) {
        call fastp {
            input:
                sample_id = sample_ids[idx],
                reads_1 = fastq_1_files[idx],
                reads_2 = fastq_2_files[idx],
                threads = threads
        }

        call salmon_quant {
            input:
                sample_id = sample_ids[idx],
                reads_1 = fastp.trimmed_1,
                reads_2 = fastp.trimmed_2,
                index = salmon_index,
                threads = threads
        }
    }

    output {
        Array[File] quant_files = salmon_quant.quant_sf
        Array[File] fastp_reports = fastp.json_report
    }
}

task fastp {
    input {
        String sample_id
        File reads_1
        File reads_2
        Int threads = 4
    }

    command <<<
        fastp -i ~{reads_1} -I ~{reads_2} \
            -o ~{sample_id}_trimmed_R1.fq.gz \
            -O ~{sample_id}_trimmed_R2.fq.gz \
            --json ~{sample_id}_fastp.json \
            --thread ~{threads}
    >>>

    output {
        File trimmed_1 = "~{sample_id}_trimmed_R1.fq.gz"
        File trimmed_2 = "~{sample_id}_trimmed_R2.fq.gz"
        File json_report = "~{sample_id}_fastp.json"
    }

    runtime {
        docker: "quay.io/biocontainers/fastp:0.23.4--hadf994f_2"
        cpu: threads
        memory: "4 GB"
    }
}

task salmon_quant {
    input {
        String sample_id
        File reads_1
        File reads_2
        File index
        Int threads = 8
    }

    command <<<
        salmon quant -i ~{index} -l A \
            -1 ~{reads_1} -2 ~{reads_2} \
            -o ~{sample_id}_salmon \
            --threads ~{threads} --validateMappings
    >>>

    output {
        File quant_sf = "~{sample_id}_salmon/quant.sf"
        File quant_dir = "~{sample_id}_salmon"
    }

    runtime {
        docker: "quay.io/biocontainers/salmon:1.10.0--h7e5ed60_0"
        cpu: threads
        memory: "16 GB"
    }
}

Run Commands

# Validate WDL syntax
womtool validate workflow.wdl

# Generate inputs template
womtool inputs workflow.wdl > inputs.json

# Run with Cromwell (local)
java -jar cromwell.jar run workflow.wdl -i inputs.json

# Run with miniwdl (simpler local runner)
miniwdl run workflow.wdl -i inputs.json

# Run on Terra
# Upload WDL and inputs.json to Terra workspace

Execution Engines

Engine	Use Case
Cromwell	Full-featured, Google Cloud, AWS, HPC
miniwdl	Lightweight local execution
Terra	Cloud platform with Cromwell backend
AnVIL	NIH cloud platform (Terra-based)
dxWDL	DNAnexus platform

Related Skills

workflow-management/cwl-workflows - CWL alternative
workflow-management/snakemake-workflows - Python-based alternative
workflow-management/nextflow-pipelines - Groovy-based alternative

Related skills

More from gptomics/bioskills

Installs

Repository

gptomics/bioskills

GitHub Stars

562

First Seen

Jan 24, 2026

bio-workflow-management-wdl-workflows

WDL Workflows

Basic Task Definition

Simple Workflow

Task with All Sections

Scatter (Parallel Execution)

Scatter with Paired Files

Conditional Execution

Structs and Complex Types

Input JSON

Array Inputs JSON

Subworkflows

Runtime Options

String Interpolation and Expressions

File Size and Disk Calculation

Complete RNA-seq Workflow

Run Commands

Execution Engines

Related Skills

More from gptomics/bioskills

bioskills

bio-epitranscriptomics-merip-preprocessing

bio-data-visualization-multipanel-figures

bio-read-qc-fastp-workflow

bio-data-visualization-circos-plots

bio-microbiome-diversity-analysis