module Biopam
: sig
open Biokepi_run_environment
open Common
val default_opam_url : string
val default_biopam_url : string
type tool_type = [
| `Library of string
| `Application
]
type install_target = private {
definition: Machine.Tool.Definition.t;
tool_type : tool_type;
package : string;
witness : string;
test :
(host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;
edges : KEDSL.workflow_edge list;
init_environment : install_path: string -> KEDSL.Program.t;
requires_conda: bool;
repository: [ `Biopam | `Opam | `Custom of string ];
compiler: string option;
}
val install_target:
?tool_type:tool_type ->
?test:(host: KEDSL.Host.t -> string -> KEDSL.Command.t) ->
?edges: KEDSL.workflow_edge list ->
?init_environment:(install_path:string -> KEDSL.Program.t) ->
?requires_conda:bool ->
witness:string ->
?package:string ->
?repository:[ `Biopam | `Custom of string | `Opam ] ->
?compiler:string ->
Machine.Tool.Definition.t ->
install_target
val provide :
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
install_path:string -> install_target -> Machine.Tool.t
val default :
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
install_path:string -> unit ->
Machine.Tool.Kit.t
end
= struct
open Biokepi_run_environment
open Common
type tool_type = [
| `Library of string
| `Application
]
type install_target = {
definition: Machine.Tool.Definition.t;
tool_type : tool_type;
package : string;
witness : string;
test : (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;
edges : KEDSL.workflow_edge list;
init_environment : install_path: string -> KEDSL.Program.t;
requires_conda: bool;
repository: [ `Biopam | `Opam | `Custom of string ];
compiler: string option;
}
let install_target
?(tool_type = `Application)
?test
?(edges = [])
?(init_environment =
fun ~install_path -> KEDSL.Program.(sh "echo 'Default Init'"))
?(requires_conda = false)
~witness
?package
?(repository = `Biopam)
?compiler
definition =
let package =
match package with
| Some p -> p
| None -> Machine.Tool.Definition.to_opam_name definition in
{definition; tool_type; package; witness; test; edges;
init_environment; requires_conda; repository; compiler}
let default_test ~host path =
KEDSL.Command.shell ~host (sprintf "test -e %s" path)
let default_opam_url =
"https://github.com/ocaml/opam/releases/download/1.2.2/opam-1.2.2-x86_64-Linux"
module Opam = struct
let dir ~install_path = install_path // "opam_dir"
let bin ~install_path = dir ~install_path // "opam"
let root ~install_path name = dir ~install_path // "opam-root-" ^ name
let target ~host ~install_path =
KEDSL.single_file ~host (bin ~install_path)
let installed ~(run_program : Machine.Make_fun.t) ~host ~install_path =
let url = default_opam_url in
let opam_exec = target ~host ~install_path in
let install_dir = dir ~install_path in
let open KEDSL in
workflow_node opam_exec
~name:"Install opam"
~make:(
run_program
~requirements:[
`Internet_access;
`Self_identification ["opam-installation"];
]
Program.(
exec ["mkdir"; "-p"; install_dir]
&& exec ["cd"; install_dir]
&& Workflow_utilities.Download.wget_program ~output_filename:"opam" url
&& shf "chmod +x %s" opam_exec#path))
~edges:[
on_failure_activate
(Workflow_utilities.Remove.path_on_host ~host install_dir);
]
let kcom ~root_name ~install_path k fmt =
let bin = bin ~install_path in
let root = root ~install_path root_name in
ksprintf k
("PATH=%s:$PATH OCAMLRUNPARAM=b OPAMLOCKRETRIES=20000 OPAMBASEPACKAGES= OPAMYES=true OPAMROOT=%s %s " ^^ fmt)
(Filename.dirname bin)
root
bin
let program_sh ?(never_fail = false) ~root_name ~install_path fmt =
kcom ~root_name ~install_path (fun s ->
KEDSL.Program.sh
(if never_fail
then s ^ " | echo 'Never fails'"
else s))
fmt
let command_shell ~root_name ~host ~install_path fmt =
kcom ~root_name ~install_path (KEDSL.Command.shell ~host) fmt
let tool_type_to_variable = function
| `Library _ -> "lib"
| `Application -> "bin"
let root_of_package p = "root-" ^ p
let which ~install_path {package; witness; tool_type; _} =
let v = tool_type_to_variable tool_type in
let s =
let package_name = String.take_while package ~f:((<>) '.') in
kcom ~root_name:(root_of_package package) ~install_path
(fun x -> x) "config var %s:%s" package_name v in
(sprintf "$(%s)" s) // witness
end
let default_biopam_url = "https://github.com/solvuu/biopam.git"
let install_tool ~(run_program : Machine.Make_fun.t) ~host ~install_path
({package; test; edges; init_environment; repository; _ } as it) =
let open KEDSL in
let run_prog name =
run_program
~requirements:[
`Internet_access;
`Self_identification ["opam"; name; package];
]
in
let root_name = Opam.root_of_package package in
let default_compiler, repo_url =
match repository with
| `Biopam -> "0.0.0", default_biopam_url
| `Opam -> "4.02.3", "https://opam.ocaml.org"
| `Custom c -> "4.02.3", c
in
let compiler = Option.value it.compiler ~default:default_compiler in
let edges =
let edges =
[ KEDSL.depends_on (Opam.installed ~run_program ~host ~install_path)] in
if it.requires_conda
then
depends_on (Conda.configured ~run_program ~host ~install_path ()) :: edges
else edges in
let name = "Installing " ^ package in
let make =
run_prog "install"
Program.(
(if it.requires_conda
then Conda.init_biokepi_env ~install_path
else sh "echo 'Does not need Conda'")
&& shf "rm -fr %s" (Filename.quote root_name)
&& Opam.program_sh
~install_path ~root_name "init --comp=%s %s"
compiler (Filename.quote repo_url)
&& Opam.program_sh ~root_name ~install_path "install %s" package
)
in
let shell_which = Opam.which ~install_path it in
let test = (Option.value test ~default:default_test) ~host shell_which in
let cond =
object
method is_done = Some (`Command_returns (test, 0))
method shell_which = shell_which
end
in
workflow_node cond ~name ~make ~edges
let provide ~run_program ~host ~install_path it =
let install_workflow =
install_tool ~run_program ~host ~install_path it in
let export_var =
match it.tool_type with
| `Application -> None
| `Library v ->
let path = install_workflow#product#shell_which in
Some KEDSL.Program.(shf "export %s=\"%s${%s:+:}${%s}\"" v path v v)
in
Machine.Tool.create it.definition
~ensure:install_workflow
~init:KEDSL.Program.(
(if it.requires_conda
then Conda.init_biokepi_env ~install_path
else sh "echo 'Does not need Conda'")
&& it.init_environment ~install_path
&& Opam.kcom ~root_name:(Opam.root_of_package it.package) ~install_path
(shf "eval $(%s)") "config env"
&& Option.value export_var ~default:(sh "echo 'No export var'")
)
let test_version ~host path =
KEDSL.Command.shell ~host (sprintf "%s --version" path)
let picard =
install_target
~tool_type:(`Library "PICARD_JAR")
~witness:"picard.jar"
(Machine.Tool.Definition.create "picard" ~version:"1.128")
let bowtie =
install_target
~witness:"bowtie" ~test:test_version
Machine.Tool.Default.bowtie
let seq2hla =
install_target
~witness:"seq2HLA" ~requires_conda:true
~package:"seq2HLA.2.2"
Machine.Tool.Default.seq2hla
let optitype =
install_target ~witness:"OptiTypePipeline" Machine.Tool.Default.optitype
~requires_conda:true
~init_environment:KEDSL.Program.(
fun ~install_path ->
let name = Machine.Tool.(Default.optitype.Definition.name) in
shf "export OPAMROOT=%s" (Opam.root_of_package name |> Opam.root ~install_path)
&& shf "export OPTITYPE_DATA=$(%s config var lib)/optitype"
(Opam.bin ~install_path)
)
let default :
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
install_path: string ->
unit ->
_ = fun ~run_program ~host ~install_path () ->
Machine.Tool.Kit.of_list
(List.map ~f:(provide ~run_program ~host ~install_path) [
picard;
bowtie;
seq2hla;
optitype;
])
end
module Build_machine
: sig
open Biokepi_run_environment
open Common
val create :
?max_processors : int ->
?gatk_jar_location:(unit -> Tool_providers.broad_jar_location) ->
?mutect_jar_location:(unit -> Tool_providers.broad_jar_location) ->
?run_program:Machine.Make_fun.t ->
?toolkit:Machine.Tool.Kit.t ->
?b37:Reference_genome.t ->
string ->
Machine.t
end
= struct
open Biokepi_run_environment
open Common
let default_run_program : host:KEDSL.Host.t -> Machine.Make_fun.t =
fun ~host ?(name="biokepi-ssh-box") ?(requirements = []) program ->
let open KEDSL in
daemonize ~using:`Python_daemon ~host program
let create
?(max_processors = 1)
?gatk_jar_location
?mutect_jar_location
?run_program ?toolkit ?b37 uri =
let open KEDSL in
let host = Host.parse (uri // "ketrew_playground") in
let meta_playground = Uri.of_string uri |> Uri.path in
let run_program =
match run_program with
| None -> default_run_program ~host
| Some r -> r
in
let toolkit =
Option.value toolkit
~default:(Tool_providers.default_toolkit ()
~run_program
~host ~install_tools_path:(meta_playground // "install-tools")
?gatk_jar_location ?mutect_jar_location)
in
Machine.create (sprintf "ssh-box-%s" uri)
~max_processors
~get_reference_genome:(fun name ->
match name, b37 with
| name, Some some37 when name = Reference_genome.name some37 -> some37
| name, _ ->
Download_reference_genomes.get_reference_genome name
~toolkit ~host ~run_program
~destination_path:(meta_playground // "reference-genome"))
~host
~toolkit
~run_program
~work_dir:(meta_playground // "work")
end
module Conda
: sig
open Biokepi_run_environment
val biokepi_conda_config : string
val configured :
run_program: Machine.Make_fun.t ->
host: Common.KEDSL.Host.t ->
install_path: string ->
unit ->
< is_done : Common.KEDSL.Condition.t option > Common.KEDSL.workflow_node
val init_biokepi_env : install_path:string -> Common.KEDSL.Program.t
end
= struct
open Biokepi_run_environment
open Common
let rm_path = Workflow_utilities.Remove.path_on_host
let dir ~install_path = install_path // "conda_dir"
let commands ~install_path com = dir ~install_path // "bin" // com
let bin = commands "conda"
let activate = commands "activate"
let com ~install_path fmt =
Printf.sprintf ("%s " ^^ fmt) (bin ~install_path)
let installed ~(run_program : Machine.Make_fun.t) ~host ~install_path =
let open KEDSL in
let url =
"https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" in
let conda_exec = single_file ~host (bin ~install_path) in
let install_dir = dir ~install_path in
workflow_node conda_exec
~name:"Install conda"
~make:(
run_program
~requirements:[
`Internet_access; `Self_identification ["conda"; "installation"]
]
Program.(
exec ["mkdir"; "-p"; install_path]
&& exec ["cd"; install_path]
&& Workflow_utilities.Download.wget_program url
&& shf "bash Miniconda3-latest-Linux-x86_64.sh -b -p %s" install_dir))
let config = "biokepi_conda_env"
let env_name = "biokepi"
let biokepi_conda_config =
{conda|# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
@EXPLICIT
https://repo.continuum.io/pkgs/free/linux-64/anaconda-client-1.2.2-py27_0.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/bcftools-1.3-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/biopython-1.66-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/cairo-1.12.18-6.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/clyent-1.2.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/cycler-0.10.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/distribute-0.6.45-py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/fontconfig-2.11.1-5.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/freetype-2.5.5-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/hdf5-1.8.15.1-2.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/htslib-1.3-0.tar.bz2
https://conda.anaconda.org/r/linux-64/libgcc-4.8.5-1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/libpng-1.6.17-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/libxml2-2.9.2-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/matplotlib-1.5.1-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/mkl-11.3.1-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/ncurses-5.9-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/numexpr-2.4.6-np110py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/numpy-1.10.4-py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/openssl-1.0.2g-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pandas-0.17.1-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pip-8.0.3-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pixman-0.32.6-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pycairo-1.10.0-py27_0.tar.bz2
https://conda.anaconda.org/trung/linux-64/pyinstaller-3.1-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pyparsing-2.0.3-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pyqt-4.11.4-py27_1.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/pysam-0.9.0-py27_2.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pytables-3.2.2-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/python-2.7.11-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/python-dateutil-2.4.2-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pytz-2015.7-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pyyaml-3.11-py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/qt-4.8.7-1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/requests-2.9.1-py27_0.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/samtools-1.3-1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/setuptools-20.1.1-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/sip-4.16.9-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/six-1.10.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/sqlite-3.9.2-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/tk-8.5.18-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/wheel-0.29.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/yaml-0.1.6-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/zlib-1.2.8-0.tar.bz2|conda}
let cfg_exists ~(run_program : Machine.Make_fun.t) ~host ~install_path =
let open KEDSL in
let file = install_path // config in
let make =
run_program
~requirements:[`Quick_run; `Self_identification ["conda"; "config-file"]]
Program.(exec ["mkdir"; "-p"; install_path]
&& shf "echo %s >> %s" (Filename.quote biokepi_conda_config) file)
in
workflow_node (single_file ~host file)
~name:("Make sure we have a biokepi conda config file: " ^ config)
~make
let configured ~(run_program : Machine.Make_fun.t) ~host ~install_path () =
let open KEDSL in
let conf =
com ~install_path "create --name %s --file %s/%s"
env_name install_path config in
let make =
run_program
~requirements:[
`Internet_access;
`Self_identification ["conda"; "configuration"];
]
Program.(
sh conf
&& shf "source %s %s" (activate ~install_path) env_name
&& chain (List.map ~f:(shf "pip install %s") [
"pyomo";
"six";
"packaging";
])
)
in
let edges = [
depends_on (installed ~run_program ~host ~install_path);
depends_on (cfg_exists ~run_program ~host ~install_path);
] in
let biokepi_env =
Command.shell ~host (com ~install_path "env list | grep %s" env_name) in
let product =
object method is_done = Some (`Command_returns (biokepi_env, 0)) end in
workflow_node product ~make ~name:"Conda is configured." ~edges
let init_biokepi_env ~install_path =
KEDSL.Program.(shf "source %s %s" (activate ~install_path) env_name)
end
module Download_reference_genomes
: sig
open Biokepi_run_environment
type pull_function =
toolkit:Machine.Tool.Kit.t ->
host:Common.KEDSL.Host.t ->
run_program:Machine.Make_fun.t ->
destination_path:string -> Reference_genome.t
val pull_b37 : pull_function
val pull_b37decoy : pull_function
val pull_b38 : pull_function
val pull_hg18 : pull_function
val pull_hg19 : pull_function
val pull_mm10 : pull_function
val default_genome_providers : (string * pull_function) list
val get_reference_genome : string -> pull_function
end
= struct
open Biokepi_run_environment
open Common
open Workflow_utilities.Download
module Vcftools = Workflow_utilities.Vcftools
let of_specification
~toolkit ~host ~run_program ~destination_path specification =
let open Reference_genome in
let {
Specification.
name;
metadata;
fasta;
dbsnp;
cosmic;
exome_gtf;
cdna;
major_contigs;
} = specification in
let dest_file f = destination_path // name // f in
let rec compile_location filename =
function
| `Url url
| `Gunzip `Url url ->
Workflow_utilities.Download.wget_gunzip
~host ~run_program ~destination:(dest_file filename) url
| `Vcf_concat l ->
let vcfs =
List.map ~f:(fun (n, loc) -> compile_location n loc) l
in
let vcftools =
Machine.Tool.Kit.get_exn toolkit Machine.Tool.Default.vcftools in
let concated =
let tmp_vcf =
dest_file (Filename.chop_extension filename ^ "-cat.vcf") in
Vcftools.vcf_concat_no_machine
~host ~vcftools ~run_program ~final_vcf:tmp_vcf vcfs in
let sorted =
let final_vcf_path = dest_file filename in
Vcftools.vcf_sort_no_machine
~host ~vcftools ~run_program
~src:concated ~dest:final_vcf_path () in
sorted
| other ->
failwithf "Reference_genome.compile_location this kind of location is not yet implemented"
in
let compile_location_opt filename =
Option.map ~f:(compile_location filename) in
create specification
(compile_location (name ^ ".fasta") fasta)
?cosmic:(compile_location_opt "cosmic.vcf" cosmic)
?dbsnp:(compile_location_opt "dbsnp.vcf" dbsnp)
?gtf:(compile_location_opt "transcripts.gtf" exome_gtf)
?cdna:(compile_location_opt "cdns-all.fa" cdna)
type pull_function =
toolkit:Machine.Tool.Kit.t ->
host:Common.KEDSL.Host.t ->
run_program:Machine.Make_fun.t ->
destination_path:string -> Reference_genome.t
let pull_b37 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.b37
let pull_b37decoy ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.b37decoy
let pull_b38 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.b38
let pull_hg19 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.hg19
let pull_hg18 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.hg18
let pull_mm10 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
of_specification ~toolkit ~host ~run_program ~destination_path
Reference_genome.Specification.Default.mm10
let default_genome_providers = [
Reference_genome.Specification.Default.Name.b37, pull_b37;
Reference_genome.Specification.Default.Name.b37decoy, pull_b37decoy;
Reference_genome.Specification.Default.Name.b38, pull_b38;
Reference_genome.Specification.Default.Name.hg18, pull_hg18;
Reference_genome.Specification.Default.Name.hg19, pull_hg19;
Reference_genome.Specification.Default.Name.mm10, pull_mm10;
]
let get_reference_genome name =
match List.find default_genome_providers ~f:(fun (a, _) -> a = name) with
| Some (_, pull) -> pull
| None -> failwithf "Cannot find the reference genorme called %S" name
end
module Tool_providers
= struct
open Biokepi_run_environment
open Common
let rm_path = Workflow_utilities.Remove.path_on_host
let generic_installation
~(run_program : Machine.Make_fun.t)
~host ~install_path
~install_program ~witness ~url
?unarchived_directory
tool_name =
let archive = Filename.basename url in
let archive_kind =
if Filename.check_suffix url "bz2" then `Tar "j"
else if Filename.check_suffix url "gz" then `Tar "z"
else if Filename.check_suffix url "tar" then `Tar ""
else if Filename.check_suffix url "zip" then `Zip
else if Filename.check_suffix url "deb" then `Deb
else `None
in
let open KEDSL in
let unarchival =
let open Program in
let and_cd =
shf "cd %s" (Option.value unarchived_directory
~default:(tool_name ^ "*")) in
match archive_kind with
| `Tar tar_option ->
chain [ shf "tar xvf%s %s" tar_option archive; and_cd ]
| `Zip ->
chain [ shf "unzip %s" archive; and_cd]
| `Deb ->
chain [
exec ["ar"; "x"; archive];
exec ["tar"; "xvfz"; "data.tar.gz"];
]
| `None -> sh "echo Not-an-archive"
in
workflow_node
~name:(sprintf "Install %s" tool_name)
witness
~edges:[
on_failure_activate (rm_path ~host install_path);
]
~make:(
run_program
~requirements:[
`Internet_access;
`Self_identification ["generic-instalation"; tool_name];
]
Program.(
shf "mkdir -p %s" install_path
&& shf "cd %s" install_path
&& Workflow_utilities.Download.wget_program url
&& unarchival
&& install_program
&& sh "echo Done"
))
module Tool_def = Machine.Tool.Definition
type installable = {
tool_definition : Tool_def.t;
url : string;
install_program : path: string -> KEDSL.Program.t;
init_program : path: string -> KEDSL.Program.t;
witness: host: KEDSL.Host.t -> path: string -> KEDSL.unknown_product;
unarchived_directory : string option;
}
let noop = KEDSL.Program.sh "echo Nothing-done-here"
let installable_tool ~url
?(install_program = fun ~path -> noop)
?(init_program = fun ~path -> noop)
~witness
?unarchived_directory
tool_definition =
{tool_definition; url; install_program;
init_program; witness; unarchived_directory}
let render_installable_tool ~run_program ~host ~install_tools_path tool =
let path =
install_tools_path // Tool_def.to_directory_name tool.tool_definition in
let ensure =
generic_installation
?unarchived_directory:tool.unarchived_directory
~run_program ~host
~install_path:path
~install_program:(tool.install_program ~path)
~witness:(tool.witness ~host ~path)
~url:tool.url
(tool.tool_definition.Tool_def.name)
in
Machine.Tool.create tool.tool_definition ~ensure
~init:(tool.init_program path)
let add_to_dollar_path ~path = KEDSL.Program.shf "export PATH=%s:$PATH" path
let make_and_copy_bin bin =
fun ~path -> KEDSL.Program.(
sh "make" && shf "cp %s %s" bin path
)
let witness_file bin =
fun ~host ~path ->
let p = KEDSL.single_file ~host (path // bin) in
object method is_done = p#is_done end
let witness_list l =
fun ~host ~path ->
KEDSL.list_of_files ~host (List.map l ~f:(fun bin -> path // bin))
|> fun p -> object method is_done = p#is_done end
let bwa =
installable_tool
Machine.Tool.Default.bwa
~url:"http://downloads.sourceforge.net/project/bio-bwa/bwa-0.7.10.tar.bz2"
~install_program:(make_and_copy_bin "bwa")
~init_program:add_to_dollar_path
~witness:(witness_file "bwa")
let stringtie =
installable_tool
Machine.Tool.Default.stringtie
~url:"https://github.com/gpertea/stringtie/archive/v1.2.2.tar.gz"
~install_program:(make_and_copy_bin "stringtie")
~init_program:add_to_dollar_path
~witness:(witness_file "stringtie")
let vcftools =
installable_tool Machine.Tool.Default.vcftools
~url:"http://downloads.sourceforge.net/project/vcftools/vcftools_0.1.12b.tar.gz"
~install_program:(fun ~path -> KEDSL.Program.(
sh "make"
&& shf "cp -r bin %s" path
&& shf "cp -r lib/perl5/site_perl %s" path
))
~witness:(witness_file @@ "bin" // "vcftools")
~init_program:(fun ~path ->
KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path
&& shf "export PERL5LIB=$PERL5LIB:%s/site_perl/" path))
let bedtools =
installable_tool Machine.Tool.Default.bedtools
~url:"https://github.com/arq5x/bedtools2/archive/v2.23.0.tar.gz"
~install_program:(fun ~path -> KEDSL.Program.(
sh "make" && shf "cp -r bin %s" path))
~init_program:(fun ~path ->
KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path))
~witness:(witness_file @@ "bin" // "bedtools")
let mosaik =
let url =
"https://mosaik-aligner.googlecode.com/files/MOSAIK-2.2.3-source.tar" in
installable_tool Machine.Tool.Default.mosaik ~url
~unarchived_directory:"MOSAIK*"
~init_program:(fun ~path ->
KEDSL.Program.(
shf "export PATH=%s:$PATH" path
&& shf "export MOSAIK_PE_ANN=%s/pe.ann" path
&& shf "export MOSAIK_SE_ANN=%s/se.ann" path
))
~witness:(witness_file "MosaikAligner")
~install_program:KEDSL.Program.(fun ~path ->
sh "make"
&& shf "cp networkFile/*pe.ann %s/pe.ann" path
&& shf "cp networkFile/*se.ann %s/se.ann" path
&& shf "cp bin/* %s" path
)
let star =
let url = "https://github.com/alexdobin/STAR/archive/STAR_2.4.1d.tar.gz" in
let star_binary = "STAR" in
let star_binary_path = sprintf "bin/Linux_x86_64/%s" star_binary in
installable_tool ~url Machine.Tool.Default.star
~init_program:add_to_dollar_path
~unarchived_directory:"STAR-*"
~install_program:KEDSL.Program.(fun ~path ->
shf "cp %s %s" star_binary_path path)
~witness:(witness_file star_binary)
let hisat tool =
let open KEDSL in
let url, hisat_binary =
let open Machine.Tool.Default in
match tool with
| one when one = hisat ->
"http://ccb.jhu.edu/software/hisat/downloads/hisat-0.1.6-beta-Linux_x86_64.zip",
"hisat"
| two when two = hisat2 ->
"ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.0.2-beta-Linux_x86_64.zip",
"hisat2"
| other ->
failwithf "Can't install Hisat version: %s" (Tool_def.to_string other)
in
installable_tool tool
~url
~witness:(witness_file hisat_binary)
~install_program:KEDSL.Program.(fun ~path ->
shf "mv hisat* %s" path
)
~init_program:add_to_dollar_path
let kallisto =
let url = "https://github.com/pachterlab/kallisto/releases/download/v0.42.3/kallisto_linux-v0.42.3.tar.gz" in
installable_tool Machine.Tool.Default.kallisto ~url
~witness:(witness_file "kallisto")
~install_program:KEDSL.Program.(fun ~path ->
shf "cp -r * %s" path
)
~init_program:add_to_dollar_path
let samtools =
let url = "https://github.com/samtools/samtools/releases/download/1.3/samtools-1.3.tar.bz2" in
let toplevel_tools = ["samtools"] in
let htslib = ["bgzip"; "tabix" ] in
let tools = toplevel_tools @ htslib in
let install_program ~path =
let open KEDSL.Program in
sh "make"
&& shf "cp %s %s" (String.concat toplevel_tools ~sep:" ") path
&& sh "cd htslib*/"
&& sh "make"
&& shf "cp %s %s" (String.concat htslib ~sep:" ") path
&& sh "echo Done"
in
let witness = witness_list tools in
installable_tool Machine.Tool.Default.samtools ~url ~install_program
~init_program:add_to_dollar_path ~witness
let cufflinks =
let url =
"http://cole-trapnell-lab.github.io/cufflinks/assets/downloads/cufflinks-2.2.1.Linux_x86_64.tar.gz" in
let witness = witness_file "cufflinks" in
let install_program ~path = KEDSL.Program.(shf "cp * %s" path) in
installable_tool Machine.Tool.Default.cufflinks ~install_program ~url
~init_program:add_to_dollar_path ~witness
let somaticsniper =
let url =
let deb_file = "somatic-sniper1.0.3_1.0.3_amd64.deb" in
sprintf
"http://apt.genome.wustl.edu/ubuntu/pool/main/s/somatic-sniper1.0.3/%s"
deb_file
in
let binary = "somaticsniper" in
let binary_in_deb = "usr/bin/bam-somaticsniper1.0.3" in
let install_program ~path =
KEDSL.Program.(shf "mv %s/%s %s/%s" path binary_in_deb path binary) in
installable_tool Machine.Tool.Default.somaticsniper ~install_program ~url
~witness:(witness_file binary) ~init_program:add_to_dollar_path
let varscan =
let url =
"http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar" in
let jar = "VarScan.v2.3.5.jar" in
let witness = witness_file jar in
let init_program ~path =
KEDSL.Program.(shf "export VARSCAN_JAR=%s/%s" path jar) in
installable_tool Machine.Tool.Default.varscan ~url ~init_program ~witness
let picard =
let url =
"https://github.com/broadinstitute/picard/releases/download/1.127/picard-tools-1.127.zip"
in
let jar = "picard-tools-1.127" // "picard.jar" in
let init_program ~path = KEDSL.Program.(shf "export PICARD_JAR=%s/%s" path jar) in
installable_tool Machine.Tool.Default.picard ~url ~init_program
~witness:(witness_file jar)
type broad_jar_location = [
| `Scp of string
| `Wget of string
| `Fail of string
]
let get_broad_jar
~(run_program : Machine.Make_fun.t)
~host ~install_path
loc =
let open KEDSL in
let jar_name =
match loc with
| `Fail s -> "cannot-get-broad-jar.jar"
| `Scp s -> Filename.basename s
| `Wget s -> Filename.basename s in
let local_box_path = install_path // jar_name in
let open KEDSL in
workflow_node (single_file local_box_path ~host)
~name:(sprintf "get-%s" jar_name)
~edges:[
on_failure_activate (rm_path ~host local_box_path)
]
~make:(
run_program
~requirements:[
`Internet_access;
`Self_identification ["broad-jar-instalation"; jar_name];
]
Program.(
shf "mkdir -p %s" install_path
&& begin match loc with
| `Fail msg ->
shf "echo 'Cannot download Broad JAR: %s'" msg
&& sh "exit 4"
| `Scp s ->
shf "scp %s %s"
(Filename.quote s) (Filename.quote local_box_path)
| `Wget s ->
shf "wget %s -O %s"
(Filename.quote s) (Filename.quote local_box_path)
end))
let mutect_tool
~(run_program : Machine.Make_fun.t)
~host ~install_tools_path loc =
let tool = Machine.Tool.Default.mutect in
let open KEDSL in
let install_path = install_tools_path // Tool_def.to_directory_name tool in
let get_mutect = get_broad_jar ~run_program ~host ~install_path loc in
Machine.Tool.create tool ~ensure:get_mutect
~init:Program.(shf "export mutect_HOME=%s" install_path)
let gatk_tool
~(run_program : Machine.Make_fun.t)
~host ~install_tools_path loc =
let tool = Machine.Tool.Default.gatk in
let open KEDSL in
let install_path = install_tools_path // Tool_def.to_directory_name tool in
let ensure = get_broad_jar ~run_program ~host ~install_path loc in
Machine.Tool.create tool ~ensure
~init:Program.(shf "export GATK_JAR=%s" ensure#product#path)
let strelka =
let url =
"ftp://strelka:%27%27@ftp.illumina.com/v1-branch/v1.0.14/strelka_workflow-1.0.14.tar.gz" in
let strelka_bin = "usr" // "bin" in
let witness = witness_file @@ strelka_bin // "configureStrelkaWorkflow.pl" in
let install_program ~path =
KEDSL.Program.(
shf "./configure --prefix=%s" (path // "usr")
&& sh "make && make install"
)
in
let init_program ~path =
KEDSL.Program.(shf "export STRELKA_BIN=%s/%s" path strelka_bin) in
installable_tool Machine.Tool.Default.strelka ~url
~init_program ~install_program ~witness
let virmid =
let url =
"http://downloads.sourceforge.net/project/virmid/virmid-1.1.1.tar.gz" in
let jar = "Virmid-1.1.1" // "Virmid.jar" in
let init_program ~path =
KEDSL.Program.(shf "export VIRMID_JAR=%s/%s" path jar) in
installable_tool Machine.Tool.Default.virmid ~url ~init_program
~unarchived_directory:"."
~witness:(witness_file jar)
let muse =
let url =
"http://bioinformatics.mdanderson.org/Software/MuSE/MuSEv1.0b" in
let binary = "MuSEv1.0b" in
let install_program ~path =
KEDSL.Program.( shf "chmod +x %s/%s" path binary) in
let init_program ~path =
KEDSL.Program.(shf "export muse_bin=%s/%s" path binary) in
installable_tool Machine.Tool.Default.muse ~url
~install_program ~init_program
~witness:(witness_file binary)
let default_jar_location msg (): broad_jar_location =
`Fail (sprintf "No location provided for %s" msg)
let default_toolkit
~run_program
~host ~install_tools_path
?(mutect_jar_location = default_jar_location "Mutect")
?(gatk_jar_location = default_jar_location "GATK")
() =
let install installable =
render_installable_tool ~host installable ~install_tools_path ~run_program
in
Machine.Tool.Kit.concat [
Machine.Tool.Kit.of_list [
mutect_tool ~run_program ~host ~install_tools_path (mutect_jar_location ());
gatk_tool ~run_program ~host ~install_tools_path (gatk_jar_location ());
install bwa;
install samtools;
install bedtools;
install vcftools;
install strelka;
install picard;
install somaticsniper;
install varscan;
install muse;
install virmid;
install star;
install stringtie;
install cufflinks;
install @@ hisat Machine.Tool.Default.hisat;
install @@ hisat Machine.Tool.Default.hisat2;
install mosaik;
install kallisto;
];
Biopam.default ~run_program ~host
~install_path:(install_tools_path // "biopam-kit") ();
]
end