bio-workflow-management-cwl-workflows
SKILL.md
CWL Workflows
Basic Tool Definition
# fastqc.cwl
cwlVersion: v1.2
class: CommandLineTool
baseCommand: fastqc
inputs:
fastq:
type: File
inputBinding:
position: 1
outputs:
html:
type: File
outputBinding:
glob: "*_fastqc.html"
zip:
type: File
outputBinding:
glob: "*_fastqc.zip"
Tool with Parameters
# bwa_mem.cwl
cwlVersion: v1.2
class: CommandLineTool
baseCommand: [bwa, mem]
requirements:
DockerRequirement:
dockerPull: biocontainers/bwa:v0.7.17
ResourceRequirement:
coresMin: 8
ramMin: 16000
inputs:
threads:
type: int
default: 8
inputBinding:
prefix: -t
position: 1
reference:
type: File
secondaryFiles:
- .amb
- .ann
- .bwt
- .pac
- .sa
inputBinding:
position: 2
reads_1:
type: File
inputBinding:
position: 3
reads_2:
type: File?
inputBinding:
position: 4
stdout: aligned.sam
outputs:
sam:
type: stdout
Basic Workflow
# rnaseq.cwl
cwlVersion: v1.2
class: Workflow
inputs:
fastq_1: File
fastq_2: File
salmon_index: Directory
outputs:
quant_results:
type: Directory
outputSource: salmon/quant_dir
steps:
fastp:
run: fastp.cwl
in:
reads_1: fastq_1
reads_2: fastq_2
out: [trimmed_1, trimmed_2, json_report]
salmon:
run: salmon_quant.cwl
in:
index: salmon_index
reads_1: fastp/trimmed_1
reads_2: fastp/trimmed_2
out: [quant_dir]
Scatter (Parallel Execution)
cwlVersion: v1.2
class: Workflow
requirements:
ScatterFeatureRequirement: {}
inputs:
fastq_files:
type: File[]
reference: File
outputs:
bam_files:
type: File[]
outputSource: align/bam
steps:
align:
run: bwa_mem.cwl
scatter: fastq
in:
fastq: fastq_files
reference: reference
out: [bam]
Multi-Scatter
requirements:
ScatterFeatureRequirement: {}
MultipleInputFeatureRequirement: {}
steps:
align:
run: bwa_mem.cwl
scatter: [reads_1, reads_2]
scatterMethod: dotproduct
in:
reads_1: fastq_1_array
reads_2: fastq_2_array
reference: reference
out: [bam]
Input File (Job)
# job.yaml
fastq_1:
class: File
path: data/sample1_R1.fq.gz
fastq_2:
class: File
path: data/sample1_R2.fq.gz
salmon_index:
class: Directory
path: ref/salmon_index
threads: 8
Secondary Files
inputs:
bam:
type: File
secondaryFiles:
- .bai
reference:
type: File
secondaryFiles:
- pattern: .fai
required: true
- pattern: .dict
required: false
Docker and Singularity
requirements:
DockerRequirement:
dockerPull: quay.io/biocontainers/salmon:1.10.0--h7e5ed60_0
hints:
SoftwareRequirement:
packages:
salmon:
version: ["1.10.0"]
# Run with Docker
cwltool --docker workflow.cwl job.yaml
# Run with Singularity
cwltool --singularity workflow.cwl job.yaml
Resource Requirements
requirements:
ResourceRequirement:
coresMin: 4
coresMax: 16
ramMin: 8000
ramMax: 32000
outdirMin: 10000
tmpdirMin: 10000
Conditional Steps
cwlVersion: v1.2
class: Workflow
requirements:
InlineJavascriptRequirement: {}
inputs:
run_qc: boolean
fastq: File
steps:
fastqc:
run: fastqc.cwl
when: $(inputs.run_qc)
in:
run_qc: run_qc
fastq: fastq
out: [html]
Subworkflows
# main.cwl
steps:
qc_workflow:
run: subworkflows/qc.cwl
in:
reads_1: fastq_1
reads_2: fastq_2
out: [qc_report, trimmed_1, trimmed_2]
alignment_workflow:
run: subworkflows/align.cwl
in:
reads_1: qc_workflow/trimmed_1
reads_2: qc_workflow/trimmed_2
out: [bam]
File Arrays and Directories
inputs:
bam_files:
type: File[]
output_dir:
type: string
default: "results"
outputs:
results:
type: Directory
outputBinding:
glob: $(inputs.output_dir)
JavaScript Expressions
requirements:
InlineJavascriptRequirement: {}
inputs:
sample_name: string
outputs:
output_bam:
type: File
outputBinding:
glob: $(inputs.sample_name + ".sorted.bam")
arguments:
- prefix: -o
valueFrom: $(inputs.sample_name).sorted.bam
InitialWorkDirRequirement
requirements:
InitialWorkDirRequirement:
listing:
- entry: $(inputs.reference)
writable: false
- entryname: config.txt
entry: |
threads=$(inputs.threads)
memory=$(inputs.memory)
Complete RNA-seq Tool
# salmon_quant.cwl
cwlVersion: v1.2
class: CommandLineTool
baseCommand: [salmon, quant]
requirements:
DockerRequirement:
dockerPull: quay.io/biocontainers/salmon:1.10.0--h7e5ed60_0
ResourceRequirement:
coresMin: 8
ramMin: 16000
inputs:
index:
type: Directory
inputBinding:
prefix: -i
reads_1:
type: File
inputBinding:
prefix: "-1"
reads_2:
type: File
inputBinding:
prefix: "-2"
lib_type:
type: string
default: A
inputBinding:
prefix: -l
threads:
type: int
default: 8
inputBinding:
prefix: --threads
output_dir:
type: string
default: quant_output
inputBinding:
prefix: -o
outputs:
quant_dir:
type: Directory
outputBinding:
glob: $(inputs.output_dir)
Run Commands
# Validate CWL file
cwltool --validate workflow.cwl
# Run workflow
cwltool workflow.cwl job.yaml
# Run with Docker
cwltool --docker workflow.cwl job.yaml
# Run with Singularity
cwltool --singularity workflow.cwl job.yaml
# Run with caching
cwltool --cachedir ./cache workflow.cwl job.yaml
# Run on Toil
toil-cwl-runner workflow.cwl job.yaml
Execution Engines
| Engine | Use Case |
|---|---|
| cwltool | Reference implementation, local execution |
| Toil | HPC clusters, cloud (AWS, Google, Azure) |
| Arvados | Enterprise workflow management |
| CWL-Airflow | Airflow integration |
Related Skills
- workflow-management/wdl-workflows - WDL alternative
- workflow-management/snakemake-workflows - Python-based alternative
- workflow-management/nextflow-pipelines - Groovy-based alternative
Weekly Installs
3
Repository
gptomics/bioskillsInstalled on
windsurf2
trae2
opencode2
codex2
claude-code2
antigravity2