(* code generated with [/Volumes/Encrypted_zzz/dev/biokepi/tools/build-doc.sh ketrew,ppx_deriving.std] *)
module Biopam 
sig

open Biokepi_run_environment
open Common

(** The default location from where we download opam. *)

val default_opam_url : string

(** The default location from where we download biopam. *)

val default_biopam_url : string

type tool_type = [
  | `Library of string   
        (** The export variable that points to witness. *)

  | `Application
]
(** The type of tool that we are installing via opam.

This guides installation and determines: 1. where we look for the witness in opam_install_path/package or opam_install_path/bin. 2. Whether we export $PATH (Application) or $LIBVAR (Library).*)



(** A description of what we'd like Biopam to install.*)

type install_target = private {
  
  (** The package handle the Biopam package provides. *)

  definition: Machine.Tool.Definition.t;

  
  (** What are we installing? See tool_type. *)

  tool_type : tool_type;

  
  (** Name of the package: `opam install package` *)

  package : string;

  
  (** File that is passed to test determine success and what is exported. *)

  witness : string;

  
  (** Test to determine success of the install. Defaults to `test -e witness`. *)

  test :
    (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;

  (* Install dependencies. *)
  edges : KEDSL.workflow_edge list;

  (* Transform the install and init programs, e.g. it needs to be run in a
     specific environment. Defaults to a “no-op”. *)

  init_environment : install_path: string -> KEDSL.Program.t;

  
  (** Whether this package requires Conda packages. *)

  requires_conda: bool;

  
  (** Which opam-repository the tool should come from. *)

  repository: [ `Biopam | `Opam | `Custom of string ];

  
  (** Which compiler should be used to create the tool's own installation opam-switch. *)

  compiler: string option;
}

val install_target:
  ?tool_type:tool_type ->
  ?test:(host: KEDSL.Host.t -> string -> KEDSL.Command.t) ->
  ?edges: KEDSL.workflow_edge list ->
  ?init_environment:(install_path:string -> KEDSL.Program.t) ->
  ?requires_conda:bool ->
  witness:string ->
  ?package:string ->
  ?repository:[ `Biopam | `Custom of string | `Opam ] ->
  ?compiler:string ->
  Machine.Tool.Definition.t ->
  install_target
(** Create install_target values.

  • tool_type: the kind of tool being installed. See tool_type. Default: `Application.
  • test: test to determine success of the install. Default: "test -e <witness>".
  • edges: dependencies to install first.
  • init_environment: transform the install and init programs, e.g. it needs to be run in a specific environment. Defaults to a “no-op”.
  • witness: name of the file (base-name) that is tested to determine the success of an installation (usually the binary for `Applications, or the JAR for Java libraries, etc.).
  • requires_conda: whether this package requires Python packages installed with Conda.
  • package: the package name in the opam sense i.e. "opam install <package-name>" (the default is to construct the package name from the Machine.Tool.Definition.t).
  • repository: Which opam-repository the tool should come from:
    • `Biopam: the Biopam project's repository (default).
    • `Opam: the default Opam repository.
    • `Custom url: use custom URL.
  • compiler: Which compiler should be used to create the tool's own installation opam-switch (the default is None corresponding to "0.0.0" for `Biopam and "4.02.3" for `Opam or `Custom _)
  • anonymous argument: the tool that the installation-target provides.
*)


val provide :
  run_program: Machine.Make_fun.t ->
  host: Common.KEDSL.Host.t ->
  install_path:string -> install_target -> Machine.Tool.t
(** Provide the specified (via install_target) tool.*)


val default : 
  run_program: Machine.Make_fun.t ->
  host: Common.KEDSL.Host.t ->
  install_path:string -> unit ->
  Machine.Tool.Kit.t
(** A set of default tools that have been specified in this module.*)

end 
struct
(** Provide tools via Biopam: https://github.com/solvuu/biopam *)


open Biokepi_run_environment
open Common

(* What are we installing via opam. This determines where we look for the
   witness; in [opam_install_path]/package or [opam_install_path]/bin. *)

type tool_type = [
  | `Library of string
  | `Application
]

type install_target = {
  definition: Machine.Tool.Definition.t;
  tool_type : tool_type;
  package : string; 
     (** What do we call 'install opam ' with *)

  witness : string; 
     (** File that must exist after install, ex:
  • bowtie exec
  • picard.jar
*)

  test : (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;
  edges : KEDSL.workflow_edge list;

  init_environment : install_path: string -> KEDSL.Program.t;
  requires_conda: bool;
  repository: [ `Biopam | `Opam | `Custom of string ];
  compiler: string option;
}
let install_target
    ?(tool_type = `Application)
    ?test
    ?(edges = [])
    ?(init_environment =
      fun ~install_path -> KEDSL.Program.(sh "echo 'Default Init'"))
    ?(requires_conda = false)
    ~witness
    ?package
    ?(repository = `Biopam)
    ?compiler
    definition =
  let package =
    match package with
    | Some p -> p
    | None -> Machine.Tool.Definition.to_opam_name definition in
  {definition; tool_type; package; witness; test; edges;
   init_environment; requires_conda; repository; compiler}

let default_test ~host path =
  KEDSL.Command.shell ~host (sprintf "test -e %s" path)

let default_opam_url =
  "https://github.com/ocaml/opam/releases/download/1.2.2/opam-1.2.2-x86_64-Linux"

(* Hide the messy logic of calling opam in here. This should not be exported
   and use the Biopam functions directly.*)

module Opam = struct

  let dir ~install_path = install_path // "opam_dir"
  let bin ~install_path = dir ~install_path // "opam"
  let root ~install_path name = dir ~install_path // "opam-root-" ^ name

  (* TODO:
     Instead of just making sure that this file exists? Wouldn't it be better
     to make sure that a command from this program gives the right output?
     ie. $ opam --version = 1.2.2 *)

  let target ~host ~install_path =
    KEDSL.single_file ~host (bin ~install_path)

  (* A workflow to ensure that opam is installed. *)
  let installed ~(run_program : Machine.Make_fun.t) ~host ~install_path =
    let url = default_opam_url in
    let opam_exec   = target ~host ~install_path in
    let install_dir = dir ~install_path in
    let open KEDSL in
    workflow_node opam_exec
      ~name:"Install opam"
      ~make:(
        run_program
          ~requirements:[
            `Internet_access;
            `Self_identification ["opam-installation"];
          ]
          Program.(
            exec ["mkdir""-p"; install_dir]
            && exec ["cd"; install_dir]
            && Workflow_utilities.Download.wget_program ~output_filename:"opam" url
            && shf "chmod +x %s" opam_exec#path))
      ~edges:[
        on_failure_activate
          (Workflow_utilities.Remove.path_on_host ~host install_dir);
      ]

  let kcom ~root_name ~install_path k fmt =
    let bin = bin ~install_path in
    let root = root ~install_path root_name in
    (* 
       - PATH: we add `opam` so that installation scripts can use the tool
       - OCAMLRUNPARAM: we want OCaml backtraces
       - OPAMLOCKRETRIES: installations should concurrently but in case of we
         bump the lock to wait instead of fail
       - OPAMBASEPACKAGES: we make sure opam does not install any package by
         default
       - OPAMYES: answer `y` to all questions (i.e. batch mode)
       - OPAMROOT: our per-package replacement for `~/.opam/`
    *)
 
    ksprintf k
      ("PATH=%s:$PATH OCAMLRUNPARAM=b OPAMLOCKRETRIES=20000 OPAMBASEPACKAGES= OPAMYES=true OPAMROOT=%s %s " ^^ fmt)
      (Filename.dirname bin)
      root
      bin

  let program_sh ?(never_fail = false) ~root_name ~install_path fmt =
    kcom ~root_name ~install_path (fun s ->
        KEDSL.Program.sh
          (if never_fail
           then s ^ " | echo 'Never fails'"
           else s))
      fmt

  let command_shell ~root_name ~host ~install_path fmt =
    kcom ~root_name ~install_path (KEDSL.Command.shell ~host) fmt

  let tool_type_to_variable = function
    | `Library _   -> "lib"
    | `Application -> "bin"

  let root_of_package p = "root-" ^ p

  (* Answer Opam 'which' questions *)
  let which ~install_path {package; witness; tool_type; _} =
    let v = tool_type_to_variable tool_type in
    let s =
      let package_name = String.take_while package ~f:((<>) '.'in
      kcom ~root_name:(root_of_package package) ~install_path
        (fun x -> x) "config var %s:%s" package_name v in
    (sprintf "$(%s)" s) // witness

end

let default_biopam_url = "https://github.com/solvuu/biopam.git"

let install_tool ~(run_program : Machine.Make_fun.t) ~host ~install_path
    ({package; test; edges; init_environment; repository; _ } as it) =
  let open KEDSL in
  let run_prog name =
    run_program
      ~requirements:[
        `Internet_access;
        `Self_identification ["opam"; name; package];
      ]
  in
  let root_name = Opam.root_of_package package in
  let default_compiler, repo_url =
    match repository with
    | `Biopam -> "0.0.0", default_biopam_url
    | `Opam -> "4.02.3""https://opam.ocaml.org"
    | `Custom c -> "4.02.3", c
  in
  let compiler = Option.value it.compiler ~default:default_compiler in
  let edges =
    let edges =
      [ KEDSL.depends_on (Opam.installed ~run_program ~host ~install_path)] in
    if it.requires_conda
    then
      depends_on (Conda.configured ~run_program ~host ~install_path ()) :: edges
    else edges in
  let name = "Installing " ^ package in
  let make =
    run_prog "install"
      Program.(
        (if it.requires_conda
         then Conda.init_biokepi_env ~install_path
         else sh "echo 'Does not need Conda'")
        && shf "rm -fr %s" (Filename.quote root_name)
        && Opam.program_sh
          ~install_path ~root_name "init --comp=%s %s"
          compiler (Filename.quote repo_url)
        && Opam.program_sh ~root_name ~install_path "install %s" package
      )
  in
  let shell_which = Opam.which ~install_path it in
  let test = (Option.value test ~default:default_test) ~host shell_which in
  let cond =
    object
      method is_done = Some (`Command_returns (test, 0))
      method shell_which = shell_which
    end
  in
  workflow_node cond ~name ~make ~edges

let provide ~run_program ~host ~install_path it =
  let install_workflow =
    install_tool ~run_program ~host ~install_path it in
  let export_var =
    match it.tool_type with
    | `Application -> None
    | `Library v   ->
      let path = install_workflow#product#shell_which in
      Some KEDSL.Program.(shf "export %s=\"%s${%s:+:}${%s}\"" v path v v)
  in
  Machine.Tool.create it.definition
    ~ensure:install_workflow
    ~init:KEDSL.Program.(
        (if it.requires_conda
         then Conda.init_biokepi_env ~install_path
         else sh "echo 'Does not need Conda'")
        && it.init_environment ~install_path
        && Opam.kcom ~root_name:(Opam.root_of_package it.package) ~install_path
          (shf "eval $(%s)""config env"
        && Option.value export_var ~default:(sh "echo 'No export var'")
      )

let test_version ~host path =
  KEDSL.Command.shell ~host (sprintf "%s --version" path)

let picard =
  install_target
    ~tool_type:(`Library "PICARD_JAR")
    ~witness:"picard.jar"
    (Machine.Tool.Definition.create "picard" ~version:"1.128")

let bowtie =
  install_target
    ~witness:"bowtie" ~test:test_version
    Machine.Tool.Default.bowtie

let seq2hla =
  install_target
    ~witness:"seq2HLA" ~requires_conda:true
    ~package:"seq2HLA.2.2" (* we need to uppercase HLA for opam *)
    Machine.Tool.Default.seq2hla

let optitype =
  install_target ~witness:"OptiTypePipeline" Machine.Tool.Default.optitype
    ~requires_conda:true
    ~init_environment:KEDSL.Program.(
        fun ~install_path ->
          let name = Machine.Tool.(Default.optitype.Definition.name) in
          shf "export OPAMROOT=%s" (Opam.root_of_package name |> Opam.root ~install_path)
          && shf "export OPTITYPE_DATA=$(%s config var lib)/optitype"
            (Opam.bin ~install_path)
      )

let default :
  run_program: Machine.Make_fun.t ->
  host: Common.KEDSL.Host.t ->
  install_path: string ->
  unit ->
  _ = fun ~run_program ~host ~install_path () ->
  Machine.Tool.Kit.of_list
    (List.map ~f:(provide ~run_program ~host ~install_path) [
    picard;
    bowtie;
    seq2hla;
    optitype;
  ])

end
module Build_machine 
sig
(** Simplified creation of Run_environment.Machine.t values *)


open Biokepi_run_environment
open Common


(** Build a Run_environment.Machine.t with convenient default values.

The string argument is a URI like the one expected by Ketrew.EDSL.Host.parse except that the “path” is the meta-playground for Biokepi (the ketrew playground will be (meta_playground // "ketrew_playground").

The default run_program is daemonizing with `Python_daemon.

The default toolkit is default_toolkit from Tool_providers. This machine will get tools installations and data-fetching from Biokepi's defaults. The ?b37 argument allows to override the locations of the “B37” genome; to override other default please use Run_environment.Machine.create directly. *)


val create :
  ?max_processors : int ->
  ?gatk_jar_location:(unit -> Tool_providers.broad_jar_location) ->
  ?mutect_jar_location:(unit -> Tool_providers.broad_jar_location) ->
  ?run_program:Machine.Make_fun.t ->
  ?toolkit:Machine.Tool.Kit.t ->
  ?b37:Reference_genome.t ->
  string ->
  Machine.t
end 
struct
open Biokepi_run_environment
open Common


let default_run_program : host:KEDSL.Host.t -> Machine.Make_fun.t =
  fun ~host ?(name="biokepi-ssh-box") ?(requirements = []) program ->
    let open KEDSL in
    daemonize ~using:`Python_daemon ~host program

let create
    ?(max_processors = 1)
    ?gatk_jar_location
    ?mutect_jar_location
    ?run_program ?toolkit ?b37 uri =
  let open KEDSL in
  let host = Host.parse (uri // "ketrew_playground"in
  let meta_playground = Uri.of_string uri |> Uri.path in
  let run_program =
    match run_program with
    | None -> default_run_program ~host
    | Some r -> r
  in
  let toolkit =
    Option.value toolkit
      ~default:(Tool_providers.default_toolkit ()
                  ~run_program
                  ~host ~install_tools_path:(meta_playground // "install-tools")
                  ?gatk_jar_location ?mutect_jar_location)
  in
  Machine.create (sprintf "ssh-box-%s" uri)
    ~max_processors
    ~get_reference_genome:(fun name ->
        match name, b37 with
        | name, Some some37 when name = Reference_genome.name some37 -> some37
        | name, _ ->        
          Download_reference_genomes.get_reference_genome name
            ~toolkit ~host ~run_program
            ~destination_path:(meta_playground // "reference-genome"))
    ~host
    ~toolkit
    ~run_program
    ~work_dir:(meta_playground // "work")
end
module Conda 
sig
open Biokepi_run_environment

(** The contents of the default Conda configuration used in Biokepi. *)

val biokepi_conda_config : string

(** A workflow node to make sure that Conda is configured. *)

val configured :
  run_program: Machine.Make_fun.t ->
  host: Common.KEDSL.Host.t ->
  install_path: string ->
  unit ->
  < is_done : Common.KEDSL.Condition.t option > Common.KEDSL.workflow_node

(** A transform to run Programs with the Conda enviroment activated. *)

val init_biokepi_env : install_path:string -> Common.KEDSL.Program.t
end 
struct
(*
  Conda is a Python environment and package manager:
  http://conda.pydata.org/docs/

  We use it to ensure a consistent Python environment for tools that depend
  on Python.
*)


open Biokepi_run_environment
open Common

let rm_path = Workflow_utilities.Remove.path_on_host

let dir ~install_path = install_path // "conda_dir"
let commands ~install_path com = dir ~install_path // "bin" // com
let bin = commands "conda"
let activate = commands "activate"

(* give a conda command. *)
let com ~install_path fmt =
  Printf.sprintf ("%s " ^^ fmt) (bin ~install_path)

(* A workflow to ensure that conda is installed. *)
let installed ~(run_program : Machine.Make_fun.t) ~host ~install_path =
  let open KEDSL in
  let url =
    "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" in
  let conda_exec  = single_file ~host (bin ~install_path) in
  let install_dir = dir ~install_path in
  workflow_node conda_exec
    ~name:"Install conda"
    ~make:(
      run_program
        ~requirements:[
          `Internet_access`Self_identification ["conda""installation"]
        ]
        Program.(
          exec ["mkdir""-p"; install_path]
          && exec ["cd"; install_path]
          && Workflow_utilities.Download.wget_program url
          && shf "bash Miniconda3-latest-Linux-x86_64.sh -b -p %s" install_dir))


let config = "biokepi_conda_env"
let env_name = "biokepi"
let biokepi_conda_config =
{conda|# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
@EXPLICIT
https://repo.continuum.io/pkgs/free/linux-64/anaconda-client-1.2.2-py27_0.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/bcftools-1.3-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/biopython-1.66-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/cairo-1.12.18-6.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/clyent-1.2.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/cycler-0.10.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/distribute-0.6.45-py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/fontconfig-2.11.1-5.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/freetype-2.5.5-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/hdf5-1.8.15.1-2.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/htslib-1.3-0.tar.bz2
https://conda.anaconda.org/r/linux-64/libgcc-4.8.5-1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/libpng-1.6.17-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/libxml2-2.9.2-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/matplotlib-1.5.1-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/mkl-11.3.1-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/ncurses-5.9-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/numexpr-2.4.6-np110py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/numpy-1.10.4-py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/openssl-1.0.2g-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pandas-0.17.1-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pip-8.0.3-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pixman-0.32.6-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pycairo-1.10.0-py27_0.tar.bz2
https://conda.anaconda.org/trung/linux-64/pyinstaller-3.1-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pyparsing-2.0.3-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pyqt-4.11.4-py27_1.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/pysam-0.9.0-py27_2.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pytables-3.2.2-np110py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/python-2.7.11-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/python-dateutil-2.4.2-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pytz-2015.7-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/pyyaml-3.11-py27_1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/qt-4.8.7-1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/requests-2.9.1-py27_0.tar.bz2
https://conda.anaconda.org/bioconda/linux-64/samtools-1.3-1.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/setuptools-20.1.1-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/sip-4.16.9-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/six-1.10.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/sqlite-3.9.2-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/tk-8.5.18-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/wheel-0.29.0-py27_0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/yaml-0.1.6-0.tar.bz2
https://repo.continuum.io/pkgs/free/linux-64/zlib-1.2.8-0.tar.bz2|conda}

(* Removed:
   https://repo.continuum.io/pkgs/free/linux-64/readline-6.2-2.tar.bz2
   This default conda compiled readline isn't linked appropriately:
   symbol lookup error: /tmp/_MEIzf9vto/libreadline.so.6: undefined symbol: PC
   We'll ignore it from the configuration and hope that the computer has a sane
   readline lib installed by default.  *)


let cfg_exists ~(run_program : Machine.Make_fun.t) ~host ~install_path =
  let open KEDSL in
  let file = install_path // config in
  let make =
    run_program
      ~requirements:[`Quick_run`Self_identification ["conda""config-file"]]
      Program.(exec ["mkdir""-p"; install_path]
               && shf "echo %s >> %s" (Filename.quote biokepi_conda_config) file)
  in
  workflow_node (single_file ~host file)
    ~name:("Make sure we have a biokepi conda config file: " ^ config)
    ~make

let configured ~(run_program : Machine.Make_fun.t) ~host ~install_path () =
  let open KEDSL in
  let conf =
    com ~install_path "create --name %s --file %s/%s"
      env_name install_path config in
  let make =
    run_program
      ~requirements:[
        `Internet_access;
        `Self_identification ["conda""configuration"];
      ]
      Program.(
        sh conf
        && shf "source %s %s" (activate ~install_path) env_name
        && chain (List.map ~f:(shf "pip install %s") [
            "pyomo";
            "six";
            "packaging";
          ])
      )
  in
  let edges = [
    depends_on (installed ~run_program ~host ~install_path);
    depends_on (cfg_exists ~run_program ~host ~install_path);
  ] in
  let biokepi_env =
    Command.shell ~host (com ~install_path "env list | grep %s" env_name) in
  let product =
    object method is_done = Some (`Command_returns (biokepi_env, 0)) end in
  workflow_node product ~make ~name:"Conda is configured." ~edges

let init_biokepi_env ~install_path  =
  KEDSL.Program.(shf "source %s %s" (activate ~install_path) env_name)
end
module Download_reference_genomes 
sig
(** Download reference-genormes (& associated data) with Ketrew *)

open Biokepi_run_environment

type pull_function =
  toolkit:Machine.Tool.Kit.t ->
  host:Common.KEDSL.Host.t ->
  run_program:Machine.Make_fun.t ->
  destination_path:string -> Reference_genome.t

val pull_b37 : pull_function
val pull_b37decoy : pull_function
val pull_b38 : pull_function
val pull_hg18 : pull_function
val pull_hg19 : pull_function
val pull_mm10 : pull_function


val default_genome_providers : (string * pull_function) list

val get_reference_genome : string -> pull_function
end 
struct
open Biokepi_run_environment
open Common



open Workflow_utilities.Download (* All the wget* functions *)
module Vcftools = Workflow_utilities.Vcftools

let of_specification
    ~toolkit ~host ~run_program ~destination_path specification =
  let open Reference_genome in
  let {
    Specification.
    name;
    metadata;
    fasta;
    dbsnp;
    cosmic;
    exome_gtf; (* maybe desrves a better name? *)
    cdna;
    major_contigs;
  } = specification in
  let dest_file f = destination_path // name // f in
  let rec compile_location filename =
    function
    | `Url url (* Right now, `wget_gunzip` is clever enough to not gunzip *)
    | `Gunzip `Url url ->
      Workflow_utilities.Download.wget_gunzip
        ~host ~run_program ~destination:(dest_file filename) url
    | `Vcf_concat l ->
      let vcfs =
        List.map ~f:(fun (n, loc) -> compile_location n loc) l
      in
      let vcftools =
        Machine.Tool.Kit.get_exn toolkit Machine.Tool.Default.vcftools in
      let concated =
        let tmp_vcf =
          dest_file (Filename.chop_extension filename ^ "-cat.vcf"in
        Vcftools.vcf_concat_no_machine
          ~host ~vcftools ~run_program ~final_vcf:tmp_vcf vcfs in
      let sorted =
        let final_vcf_path = dest_file filename in
        Vcftools.vcf_sort_no_machine
          ~host ~vcftools ~run_program
          ~src:concated ~dest:final_vcf_path () in
      sorted
    | other ->
      failwithf "Reference_genome.compile_location this kind of location is not yet implemented"
  in
  let compile_location_opt filename =
    Option.map ~f:(compile_location filename) in
  create specification
    (compile_location (name ^ ".fasta") fasta)
    ?cosmic:(compile_location_opt "cosmic.vcf" cosmic)
    ?dbsnp:(compile_location_opt "dbsnp.vcf" dbsnp)
    ?gtf:(compile_location_opt "transcripts.gtf" exome_gtf)
    ?cdna:(compile_location_opt "cdns-all.fa" cdna)

type pull_function =
  toolkit:Machine.Tool.Kit.t ->
  host:Common.KEDSL.Host.t ->
  run_program:Machine.Make_fun.t ->
  destination_path:string -> Reference_genome.t


let pull_b37 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
  of_specification ~toolkit ~host ~run_program ~destination_path
    Reference_genome.Specification.Default.b37

let pull_b37decoy ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
  of_specification ~toolkit ~host ~run_program ~destination_path
    Reference_genome.Specification.Default.b37decoy

let pull_b38 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
  of_specification ~toolkit ~host ~run_program ~destination_path
    Reference_genome.Specification.Default.b38

let pull_hg19 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
  of_specification ~toolkit ~host ~run_program ~destination_path
    Reference_genome.Specification.Default.hg19

let pull_hg18 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
  of_specification ~toolkit ~host ~run_program ~destination_path
    Reference_genome.Specification.Default.hg18

let pull_mm10 ~toolkit ~host ~(run_program : Machine.Make_fun.t) ~destination_path =
  of_specification ~toolkit ~host ~run_program ~destination_path
    Reference_genome.Specification.Default.mm10

let default_genome_providers = [
  Reference_genome.Specification.Default.Name.b37, pull_b37;
  Reference_genome.Specification.Default.Name.b37decoy, pull_b37decoy;
  Reference_genome.Specification.Default.Name.b38, pull_b38;
  Reference_genome.Specification.Default.Name.hg18, pull_hg18;
  Reference_genome.Specification.Default.Name.hg19, pull_hg19;
  Reference_genome.Specification.Default.Name.mm10, pull_mm10;
]

let get_reference_genome name =
  match List.find default_genome_providers ~f:(fun (a, _) -> a = name) with
  | Some (_, pull) -> pull
  | None -> failwithf "Cannot find the reference genorme called %S" name
              
end
module Tool_providers 
struct

open Biokepi_run_environment
open Common

let rm_path = Workflow_utilities.Remove.path_on_host

let generic_installation
    ~(run_program : Machine.Make_fun.t)
    ~host ~install_path 
    ~install_program ~witness ~url
    ?unarchived_directory
    tool_name =
  let archive = Filename.basename url in
  let archive_kind = 
    if Filename.check_suffix url "bz2" then `Tar "j" 
    else if Filename.check_suffix url "gz"  then `Tar "z"
    else if Filename.check_suffix url "tar" then `Tar ""
    else if Filename.check_suffix url "zip" then `Zip
    else if Filename.check_suffix url "deb" then `Deb
    else `None
  in
  let open KEDSL in
  let unarchival =
    let open Program in
    let and_cd =
      shf "cd %s" (Option.value unarchived_directory
                     ~default:(tool_name ^ "*")) in
    match archive_kind with
    | `Tar tar_option ->
      chain [ shf "tar xvf%s %s" tar_option archive; and_cd ]
    | `Zip ->
      chain [ shf "unzip %s" archive; and_cd]
    | `Deb ->
      chain [
        exec ["ar""x"; archive];
        exec ["tar""xvfz""data.tar.gz"];
      ]
    | `None -> sh "echo Not-an-archive"
  in
  workflow_node
    ~name:(sprintf "Install %s" tool_name)
    witness
    (* (single_file ~host *)
    (*    (Option.value witness ~default:(install_path // tool_name))) *)
    ~edges:[
      on_failure_activate (rm_path ~host install_path);
    ]
    ~make:(
      run_program
        ~requirements:[
          `Internet_access;
          `Self_identification ["generic-instalation"; tool_name];
        ]
        Program.(
          shf "mkdir -p %s" install_path
          && shf "cd %s" install_path
          && Workflow_utilities.Download.wget_program url
          && unarchival
          && install_program
          && sh "echo Done"
        ))

module Tool_def = Machine.Tool.Definition

type installable = {
  tool_definition : Tool_def.t;
  url : string;
  install_program : path: string -> KEDSL.Program.t;
  init_program : path: string -> KEDSL.Program.t;
  witness: host: KEDSL.Host.t -> path: string -> KEDSL.unknown_product;
  unarchived_directory : string option;
}
let noop = KEDSL.Program.sh "echo Nothing-done-here"


let installable_tool ~url
    ?(install_program = fun ~path -> noop)
    ?(init_program = fun ~path -> noop)
    ~witness
    ?unarchived_directory
    tool_definition =
  {tool_definition; url; install_program;
   init_program; witness; unarchived_directory}

let render_installable_tool ~run_program ~host ~install_tools_path tool =
  let path =
    install_tools_path // Tool_def.to_directory_name tool.tool_definition in
  let  ensure =
    generic_installation
      ?unarchived_directory:tool.unarchived_directory
      ~run_program ~host 
      ~install_path:path
      ~install_program:(tool.install_program ~path)
      ~witness:(tool.witness ~host ~path)
      ~url:tool.url
      (tool.tool_definition.Tool_def.name)
  in
  Machine.Tool.create tool.tool_definition ~ensure
    ~init:(tool.init_program path)

let add_to_dollar_path ~path = KEDSL.Program.shf "export PATH=%s:$PATH" path

let make_and_copy_bin bin =
  fun ~path -> KEDSL.Program.(
      sh "make" && shf "cp %s %s" bin path
    )
let witness_file bin =
  fun ~host ~path ->
    let p = KEDSL.single_file ~host (path // bin) in
    object method is_done = p#is_done end
let witness_list l =
  fun ~host ~path ->
    KEDSL.list_of_files ~host (List.map l ~f:(fun bin -> path // bin))
    |> fun p -> object method is_done = p#is_done end

let bwa = 
  installable_tool 
    Machine.Tool.Default.bwa
    ~url:"http://downloads.sourceforge.net/project/bio-bwa/bwa-0.7.10.tar.bz2"
    ~install_program:(make_and_copy_bin "bwa")
    ~init_program:add_to_dollar_path
    ~witness:(witness_file "bwa")

let stringtie =
  installable_tool
    Machine.Tool.Default.stringtie
    ~url:"https://github.com/gpertea/stringtie/archive/v1.2.2.tar.gz"
    ~install_program:(make_and_copy_bin "stringtie")
    ~init_program:add_to_dollar_path
    ~witness:(witness_file "stringtie")

let vcftools =
  installable_tool Machine.Tool.Default.vcftools
    ~url:"http://downloads.sourceforge.net/project/vcftools/vcftools_0.1.12b.tar.gz"
    ~install_program:(fun ~path -> KEDSL.Program.(
        sh "make"
        && shf  "cp -r bin %s" path
        && shf  "cp -r lib/perl5/site_perl %s" path
      ))
    ~witness:(witness_file @@ "bin" // "vcftools")
    ~init_program:(fun ~path ->
        KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path
                       && shf "export PERL5LIB=$PERL5LIB:%s/site_perl/" path))

let bedtools =
  installable_tool Machine.Tool.Default.bedtools
    ~url:"https://github.com/arq5x/bedtools2/archive/v2.23.0.tar.gz"
    ~install_program:(fun ~path -> KEDSL.Program.(
        sh "make" && shf "cp -r bin %s" path))
    ~init_program:(fun ~path ->
        KEDSL.Program.(shf "export PATH=%s/bin/:$PATH" path))
    ~witness:(witness_file @@ "bin" // "bedtools")

let mosaik =
  let url =
    "https://mosaik-aligner.googlecode.com/files/MOSAIK-2.2.3-source.tar" in
  installable_tool Machine.Tool.Default.mosaik ~url
    ~unarchived_directory:"MOSAIK*"
    ~init_program:(fun ~path ->
        KEDSL.Program.(
          shf "export PATH=%s:$PATH" path
          && shf "export MOSAIK_PE_ANN=%s/pe.ann" path
          && shf "export MOSAIK_SE_ANN=%s/se.ann" path
        ))
    ~witness:(witness_file "MosaikAligner")
    ~install_program:KEDSL.Program.(fun ~path ->
        sh "make"
        && shf "cp networkFile/*pe.ann %s/pe.ann" path
        && shf "cp networkFile/*se.ann %s/se.ann" path
        && shf "cp bin/* %s" path
      )

let star =
  let url = "https://github.com/alexdobin/STAR/archive/STAR_2.4.1d.tar.gz" in
  let star_binary = "STAR" in
  (* TODO: there are other binaries in `bin/` *)
  let star_binary_path = sprintf "bin/Linux_x86_64/%s" star_binary in
  installable_tool ~url Machine.Tool.Default.star
    ~init_program:add_to_dollar_path
    ~unarchived_directory:"STAR-*"
    ~install_program:KEDSL.Program.(fun ~path ->
        shf "cp %s %s" star_binary_path path)
    ~witness:(witness_file star_binary)

let hisat tool =
  let open KEDSL in
  let url, hisat_binary = 
    let open Machine.Tool.Default in
    match tool with
    | one when one = hisat ->
      "http://ccb.jhu.edu/software/hisat/downloads/hisat-0.1.6-beta-Linux_x86_64.zip",
      "hisat"
    | two when two = hisat2 ->
      "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.0.2-beta-Linux_x86_64.zip",
      "hisat2"
    | other ->
      failwithf "Can't install Hisat version: %s" (Tool_def.to_string other)
  in
  installable_tool tool
    ~url
    ~witness:(witness_file hisat_binary)
    ~install_program:KEDSL.Program.(fun ~path ->
        shf "mv hisat* %s" path
      )
  ~init_program:add_to_dollar_path

let kallisto =
  let url = "https://github.com/pachterlab/kallisto/releases/download/v0.42.3/kallisto_linux-v0.42.3.tar.gz" in
  installable_tool Machine.Tool.Default.kallisto ~url
    ~witness:(witness_file "kallisto")
    ~install_program:KEDSL.Program.(fun ~path ->
        shf "cp -r * %s" path
      )
  ~init_program:add_to_dollar_path

let samtools =
  let url = "https://github.com/samtools/samtools/releases/download/1.3/samtools-1.3.tar.bz2" in
  let toplevel_tools = ["samtools"in
  let htslib = ["bgzip""tabix" ] in
  let tools = toplevel_tools @ htslib in
  let install_program ~path =
    let open KEDSL.Program in
    sh "make"
    && shf "cp %s %s" (String.concat toplevel_tools ~sep:" ")  path
    && sh "cd htslib*/"
    && sh "make"
    && shf "cp %s %s" (String.concat htslib ~sep:" ") path
    && sh "echo Done"
  in
  let witness = witness_list tools in
  installable_tool Machine.Tool.Default.samtools ~url ~install_program
    ~init_program:add_to_dollar_path ~witness


let cufflinks =
  let url = 
    "http://cole-trapnell-lab.github.io/cufflinks/assets/downloads/cufflinks-2.2.1.Linux_x86_64.tar.gz" in
  let witness = witness_file "cufflinks" in
  let install_program ~path = KEDSL.Program.(shf "cp * %s" path) in
  installable_tool Machine.Tool.Default.cufflinks ~install_program ~url 
    ~init_program:add_to_dollar_path ~witness

let somaticsniper =
  let url =
    let deb_file = "somatic-sniper1.0.3_1.0.3_amd64.deb" in
    sprintf
      "http://apt.genome.wustl.edu/ubuntu/pool/main/s/somatic-sniper1.0.3/%s"
      deb_file
  in
  let binary = "somaticsniper" in
  let binary_in_deb = "usr/bin/bam-somaticsniper1.0.3" in
  let install_program ~path =
    KEDSL.Program.(shf "mv %s/%s %s/%s" path binary_in_deb path binary) in
  installable_tool Machine.Tool.Default.somaticsniper ~install_program ~url
    ~witness:(witness_file binary) ~init_program:add_to_dollar_path



let varscan =
  let url =
    "http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.5.jar" in
  let jar = "VarScan.v2.3.5.jar" in
  let witness = witness_file jar in
  let init_program ~path =
    KEDSL.Program.(shf "export VARSCAN_JAR=%s/%s" path jar) in
  installable_tool Machine.Tool.Default.varscan ~url ~init_program ~witness

let picard =
  let url =
    "https://github.com/broadinstitute/picard/releases/download/1.127/picard-tools-1.127.zip"
  in
  let jar = "picard-tools-1.127" // "picard.jar" in
  let init_program ~path = KEDSL.Program.(shf "export PICARD_JAR=%s/%s" path jar) in
  installable_tool Machine.Tool.Default.picard ~url ~init_program
    ~witness:(witness_file jar)

type broad_jar_location = [
  | `Scp of string
  | `Wget of string
  | `Fail of string
]
(** Mutect (and some other tools) are behind some web-login annoying thing: c.f. <http://www.broadinstitute.org/cancer/cga/mutect_download> So the user of the lib must provide an SSH or HTTP URL (or reimplement the `Tool.t` is some other way). *)


let get_broad_jar
    ~(run_program : Machine.Make_fun.t)
    ~host ~install_path 
    loc =
  let open KEDSL in
  let jar_name =
    match loc with
    | `Fail s -> "cannot-get-broad-jar.jar"
    | `Scp s -> Filename.basename s
    | `Wget s -> Filename.basename s in
  let local_box_path = install_path // jar_name in
  let open KEDSL in
  workflow_node (single_file local_box_path ~host)
    ~name:(sprintf "get-%s" jar_name)
    ~edges:[
      on_failure_activate (rm_path ~host local_box_path)
    ]
    ~make:(
      run_program
        ~requirements:[
          `Internet_access;
          `Self_identification ["broad-jar-instalation"; jar_name];
        ]
        Program.(
          shf "mkdir -p %s" install_path
          && begin match loc with
          | `Fail msg ->
            shf "echo 'Cannot download Broad JAR: %s'" msg
            && sh "exit 4"
          | `Scp s ->
            shf "scp %s %s"
              (Filename.quote s) (Filename.quote local_box_path)
          | `Wget s ->
            shf "wget %s -O %s"
              (Filename.quote s) (Filename.quote local_box_path)
          end))

let mutect_tool
    ~(run_program : Machine.Make_fun.t)
    ~host ~install_tools_path loc =
  let tool = Machine.Tool.Default.mutect in
  let open KEDSL in
  let install_path = install_tools_path // Tool_def.to_directory_name tool in
  let get_mutect = get_broad_jar ~run_program ~host ~install_path loc in
  Machine.Tool.create tool ~ensure:get_mutect
    ~init:Program.(shf "export mutect_HOME=%s" install_path)

let gatk_tool
    ~(run_program : Machine.Make_fun.t)
    ~host ~install_tools_path loc =
  let tool = Machine.Tool.Default.gatk in
  let open KEDSL in
  let install_path = install_tools_path // Tool_def.to_directory_name tool in
  let ensure = get_broad_jar ~run_program ~host ~install_path loc in
  Machine.Tool.create tool ~ensure
    ~init:Program.(shf "export GATK_JAR=%s" ensure#product#path)

(**

Strelka is built from source but does not seem to build on MacOSX.

*)


let strelka =
  let url =
    "ftp://strelka:%27%27@ftp.illumina.com/v1-branch/v1.0.14/strelka_workflow-1.0.14.tar.gz" in
  let strelka_bin = "usr" // "bin" in
  let witness = witness_file @@ strelka_bin // "configureStrelkaWorkflow.pl" in
  let install_program ~path =
    (* C.f. ftp://ftp.illumina.com/v1-branch/v1.0.14/README *)
    KEDSL.Program.(
      shf "./configure --prefix=%s" (path // "usr")
      && sh "make && make install"
    )
  in
  let init_program ~path =
    KEDSL.Program.(shf "export STRELKA_BIN=%s/%s" path strelka_bin) in
  installable_tool Machine.Tool.Default.strelka ~url
    ~init_program ~install_program ~witness

let virmid =
  let url =
    "http://downloads.sourceforge.net/project/virmid/virmid-1.1.1.tar.gz" in
  let jar = "Virmid-1.1.1" // "Virmid.jar" in
  let init_program ~path =
    KEDSL.Program.(shf "export VIRMID_JAR=%s/%s" path jar) in
  installable_tool Machine.Tool.Default.virmid ~url ~init_program
    ~unarchived_directory:"."
    ~witness:(witness_file jar)

let muse =
  let url =
    "http://bioinformatics.mdanderson.org/Software/MuSE/MuSEv1.0b" in
  let binary = "MuSEv1.0b" in
  let install_program ~path =
    KEDSL.Program.( shf "chmod +x %s/%s" path binary) in
  let init_program ~path =
    KEDSL.Program.(shf "export muse_bin=%s/%s" path binary) in
  installable_tool Machine.Tool.Default.muse ~url
    ~install_program ~init_program
    ~witness:(witness_file binary)

let default_jar_location msg (): broad_jar_location =
  `Fail (sprintf "No location provided for %s" msg)

let default_toolkit
    ~run_program
    ~host ~install_tools_path
    ?(mutect_jar_location = default_jar_location "Mutect")
    ?(gatk_jar_location = default_jar_location "GATK")
    () =
  let install installable =
    render_installable_tool ~host installable ~install_tools_path ~run_program
  in
  Machine.Tool.Kit.concat [
    Machine.Tool.Kit.of_list [
      mutect_tool ~run_program ~host ~install_tools_path (mutect_jar_location ());
      gatk_tool ~run_program ~host ~install_tools_path (gatk_jar_location ());
      install bwa;
      install samtools;
      install bedtools;
      install vcftools;
      install strelka;
      install picard;
      install somaticsniper;
      install varscan;
      install muse;
      install virmid;
      install star;
      install stringtie;
      install cufflinks;
      install @@ hisat Machine.Tool.Default.hisat;
      install @@ hisat Machine.Tool.Default.hisat2;
      install mosaik;
      install kallisto;
    ];
    Biopam.default ~run_program ~host
      ~install_path:(install_tools_path // "biopam-kit") ();
  ]

end