let base_quality_score_recalibrator
~configuration:(bqsr_configuration, print_reads_configuration)
~run_with
~input_bam ~output_bam =
let open KEDSL in
let name = sprintf "gatk-%s" (Filename.basename output_bam) in
let gatk = Machine.get_tool run_with Machine.Tool.Default.gatk in
let reference_genome =
Machine.get_reference_genome run_with input_bam#product#reference_build in
let fasta = Reference_genome.fasta reference_genome in
let db_snp = Reference_genome.dbsnp_exn reference_genome in
let sorted_bam =
Samtools.sort_bam_if_necessary
~run_with ~by:`Coordinate input_bam in
let input_bam = `Please_use_the_sorted_one in ignore input_bam;
let recal_data_table =
Filename.chop_suffix sorted_bam#product#path ".bam" ^ "-recal_data.table"
in
let processors = Machine.max_processors run_with in
let make =
Machine.run_big_program run_with ~name ~processors
~self_ids:["gatk"; "bqsr"]
Program.(
Machine.Tool.(init gatk)
&& call_gatk ~analysis:"BaseRecalibrator" ([
"-nct"; Int.to_string processors;
"-I"; sorted_bam#product#path;
"-R"; fasta#product#path;
"-knownSites"; db_snp#product#path;
"-o"; recal_data_table;
] @ Configuration.Bqsr.render bqsr_configuration)
&& call_gatk ~analysis:"PrintReads" ([
"-nct"; Int.to_string processors;
"-R"; fasta#product#path;
"-I"; sorted_bam#product#path;
"-BQSR"; recal_data_table;
"-o"; output_bam;
] @ Configuration.Print_reads.render print_reads_configuration)
) in
workflow_node ~name (transform_bam sorted_bam#product ~path:output_bam)
~make
~edges:[
depends_on Machine.Tool.(ensure gatk);
depends_on fasta; depends_on db_snp;
depends_on sorted_bam;
depends_on (Samtools.index_to_bai ~run_with sorted_bam);
on_failure_activate (Remove.file ~run_with output_bam);
on_failure_activate (Remove.file ~run_with recal_data_table);
]