sig

open Biokepi_run_environment
open Common

(** The default location from where we download opam. *)

val default_opam_url : string

(** The default location from where we download biopam. *)

val default_biopam_url : string

type tool_type = [
  | `Library of string   
        (** The export variable that points to witness. *)

  | `Application
]
(** The type of tool that we are installing via opam.

This guides installation and determines: 1. where we look for the witness in opam_install_path/package or opam_install_path/bin. 2. Whether we export $PATH (Application) or $LIBVAR (Library).*)



(** A description of what we'd like Biopam to install.*)

type install_target = private {
  
  (** The package handle the Biopam package provides. *)

  definition: Machine.Tool.Definition.t;

  
  (** What are we installing? See tool_type. *)

  tool_type : tool_type;

  
  (** Name of the package: `opam install package` *)

  package : string;

  
  (** File that is passed to test determine success and what is exported. *)

  witness : string;

  
  (** Test to determine success of the install. Defaults to `test -e witness`. *)

  test :
    (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;

  (* Install dependencies. *)
  edges : KEDSL.workflow_edge list;

  (* Transform the install and init programs, e.g. it needs to be run in a
     specific environment. Defaults to a “no-op”. *)

  init_environment : install_path: string -> KEDSL.Program.t;

  
  (** Whether this package requires Conda packages. *)

  requires_conda: bool;

  
  (** Which opam-repository the tool should come from. *)

  repository: [ `Biopam | `Opam | `Custom of string ];

  
  (** Which compiler should be used to create the tool's own installation opam-switch. *)

  compiler: string option;
}

val install_target:
  ?tool_type:tool_type ->
  ?test:(host: KEDSL.Host.t -> string -> KEDSL.Command.t) ->
  ?edges: KEDSL.workflow_edge list ->
  ?init_environment:(install_path:string -> KEDSL.Program.t) ->
  ?requires_conda:bool ->
  witness:string ->
  ?package:string ->
  ?repository:[ `Biopam | `Custom of string | `Opam ] ->
  ?compiler:string ->
  Machine.Tool.Definition.t ->
  install_target
(** Create install_target values.

  • tool_type: the kind of tool being installed. See tool_type. Default: `Application.
  • test: test to determine success of the install. Default: "test -e <witness>".
  • edges: dependencies to install first.
  • init_environment: transform the install and init programs, e.g. it needs to be run in a specific environment. Defaults to a “no-op”.
  • witness: name of the file (base-name) that is tested to determine the success of an installation (usually the binary for `Applications, or the JAR for Java libraries, etc.).
  • requires_conda: whether this package requires Python packages installed with Conda.
  • package: the package name in the opam sense i.e. "opam install <package-name>" (the default is to construct the package name from the Machine.Tool.Definition.t).
  • repository: Which opam-repository the tool should come from:
    • `Biopam: the Biopam project's repository (default).
    • `Opam: the default Opam repository.
    • `Custom url: use custom URL.
  • compiler: Which compiler should be used to create the tool's own installation opam-switch (the default is None corresponding to "0.0.0" for `Biopam and "4.02.3" for `Opam or `Custom _)
  • anonymous argument: the tool that the installation-target provides.
*)


val provide :
  run_program: Machine.Make_fun.t ->
  host: Common.KEDSL.Host.t ->
  install_path:string -> install_target -> Machine.Tool.t
(** Provide the specified (via install_target) tool.*)


val default : 
  run_program: Machine.Make_fun.t ->
  host: Common.KEDSL.Host.t ->
  install_path:string -> unit ->
  Machine.Tool.Kit.t
(** A set of default tools that have been specified in this module.*)

end 
struct
(** Provide tools via Biopam: https://github.com/solvuu/biopam *)


open Biokepi_run_environment
open Common

(* What are we installing via opam. This determines where we look for the
   witness; in [opam_install_path]/package or [opam_install_path]/bin. *)

type tool_type = [
  | `Library of string
  | `Application
]

type install_target = {
  definition: Machine.Tool.Definition.t;
  tool_type : tool_type;
  package : string; 
     (** What do we call 'install opam ' with *)

  witness : string; 
     (** File that must exist after install, ex:
  • bowtie exec
  • picard.jar
*)

  test : (host:KEDSL.Host.t -> string -> KEDSL.Command.t) option;
  edges : KEDSL.workflow_edge list;

  init_environment : install_path: string -> KEDSL.Program.t;
  requires_conda: bool;
  repository: [ `Biopam | `Opam | `Custom of string ];
  compiler: string option;
}
let install_target
    ?(tool_type = `Application)
    ?test
    ?(edges = [])
    ?(init_environment =
      fun ~install_path -> KEDSL.Program.(sh "echo 'Default Init'"))
    ?(requires_conda = false)
    ~witness
    ?package
    ?(repository = `Biopam)
    ?compiler
    definition =
  let package =
    match package with
    | Some p -> p
    | None -> Machine.Tool.Definition.to_opam_name definition in
  {definition; tool_type; package; witness; test; edges;
   init_environment; requires_conda; repository; compiler}

let default_test ~host path =
  KEDSL.Command.shell ~host (sprintf "test -e %s" path)

let default_opam_url =
  "https://github.com/ocaml/opam/releases/download/1.2.2/opam-1.2.2-x86_64-Linux"

(* Hide the messy logic of calling opam in here. This should not be exported
   and use the Biopam functions directly.*)

module Opam = struct

  let dir ~install_path = install_path // "opam_dir"
  let bin ~install_path = dir ~install_path // "opam"
  let root ~install_path name = dir ~install_path // "opam-root-" ^ name

  (* TODO:
     Instead of just making sure that this file exists? Wouldn't it be better
     to make sure that a command from this program gives the right output?
     ie. $ opam --version = 1.2.2 *)

  let target ~host ~install_path =
    KEDSL.single_file ~host (bin ~install_path)

  (* A workflow to ensure that opam is installed. *)
  let installed ~(run_program : Machine.Make_fun.t) ~host ~install_path =
    let url = default_opam_url in
    let opam_exec   = target ~host ~install_path in
    let install_dir = dir ~install_path in
    let open KEDSL in
    workflow_node opam_exec
      ~name:"Install opam"
      ~make:(
        run_program
          ~requirements:[
            `Internet_access;
            `Self_identification ["opam-installation"];
          ]
          Program.(
            exec ["mkdir""-p"; install_dir]
            && exec ["cd"; install_dir]
            && Workflow_utilities.Download.wget_program ~output_filename:"opam" url
            && shf "chmod +x %s" opam_exec#path))
      ~edges:[
        on_failure_activate
          (Workflow_utilities.Remove.path_on_host ~host install_dir);
      ]

  let kcom ~root_name ~install_path k fmt =
    let bin = bin ~install_path in
    let root = root ~install_path root_name in
    (* 
       - PATH: we add `opam` so that installation scripts can use the tool
       - OCAMLRUNPARAM: we want OCaml backtraces
       - OPAMLOCKRETRIES: installations should concurrently but in case of we
         bump the lock to wait instead of fail
       - OPAMBASEPACKAGES: we make sure opam does not install any package by
         default
       - OPAMYES: answer `y` to all questions (i.e. batch mode)
       - OPAMROOT: our per-package replacement for `~/.opam/`
    *)
 
    ksprintf k
      ("PATH=%s:$PATH OCAMLRUNPARAM=b OPAMLOCKRETRIES=20000 OPAMBASEPACKAGES= OPAMYES=true OPAMROOT=%s %s " ^^ fmt)
      (Filename.dirname bin)
      root
      bin

  let program_sh ?(never_fail = false) ~root_name ~install_path fmt =
    kcom ~root_name ~install_path (fun s ->
        KEDSL.Program.sh
          (if never_fail
           then s ^ " | echo 'Never fails'"
           else s))
      fmt

  let command_shell ~root_name ~host ~install_path fmt =
    kcom ~root_name ~install_path (KEDSL.Command.shell ~host) fmt

  let tool_type_to_variable = function
    | `Library _   -> "lib"
    | `Application -> "bin"

  let root_of_package p = "root-" ^ p

  (* Answer Opam 'which' questions *)
  let which ~install_path {package; witness; tool_type; _} =
    let v = tool_type_to_variable tool_type in
    let s =
      let package_name = String.take_while package ~f:((<>) '.'in
      kcom ~root_name:(root_of_package package) ~install_path
        (fun x -> x) "config var %s:%s" package_name v in
    (sprintf "$(%s)" s) // witness

end

let default_biopam_url = "https://github.com/solvuu/biopam.git"

let install_tool ~(run_program : Machine.Make_fun.t) ~host ~install_path
    ({package; test; edges; init_environment; repository; _ } as it) =
  let open KEDSL in
  let run_prog name =
    run_program
      ~requirements:[
        `Internet_access;
        `Self_identification ["opam"; name; package];
      ]
  in
  let root_name = Opam.root_of_package package in
  let default_compiler, repo_url =
    match repository with
    | `Biopam -> "0.0.0", default_biopam_url
    | `Opam -> "4.02.3""https://opam.ocaml.org"
    | `Custom c -> "4.02.3", c
  in
  let compiler = Option.value it.compiler ~default:default_compiler in
  let edges =
    let edges =
      [ KEDSL.depends_on (Opam.installed ~run_program ~host ~install_path)] in
    if it.requires_conda
    then
      depends_on (Conda.configured ~run_program ~host ~install_path ()) :: edges
    else edges in
  let name = "Installing " ^ package in
  let make =
    run_prog "install"
      Program.(
        (if it.requires_conda
         then Conda.init_biokepi_env ~install_path
         else sh "echo 'Does not need Conda'")
        && shf "rm -fr %s" (Filename.quote root_name)
        && Opam.program_sh
          ~install_path ~root_name "init --comp=%s %s"
          compiler (Filename.quote repo_url)
        && Opam.program_sh ~root_name ~install_path "install %s" package
      )
  in
  let shell_which = Opam.which ~install_path it in
  let test = (Option.value test ~default:default_test) ~host shell_which in
  let cond =
    object
      method is_done = Some (`Command_returns (test, 0))
      method shell_which = shell_which
    end
  in
  workflow_node cond ~name ~make ~edges

let provide ~run_program ~host ~install_path it =
  let install_workflow =
    install_tool ~run_program ~host ~install_path it in
  let export_var =
    match it.tool_type with
    | `Application -> None
    | `Library v   ->
      let path = install_workflow#product#shell_which in
      Some KEDSL.Program.(shf "export %s=\"%s${%s:+:}${%s}\"" v path v v)
  in
  Machine.Tool.create it.definition
    ~ensure:install_workflow
    ~init:KEDSL.Program.(
        (if it.requires_conda
         then Conda.init_biokepi_env ~install_path
         else sh "echo 'Does not need Conda'")
        && it.init_environment ~install_path
        && Opam.kcom ~root_name:(Opam.root_of_package it.package) ~install_path
          (shf "eval $(%s)""config env"
        && Option.value export_var ~default:(sh "echo 'No export var'")
      )

let test_version ~host path =
  KEDSL.Command.shell ~host (sprintf "%s --version" path)

let picard =
  install_target
    ~tool_type:(`Library "PICARD_JAR")
    ~witness:"picard.jar"
    (Machine.Tool.Definition.create "picard" ~version:"1.128")

let bowtie =
  install_target
    ~witness:"bowtie" ~test:test_version
    Machine.Tool.Default.bowtie

let seq2hla =
  install_target
    ~witness:"seq2HLA" ~requires_conda:true
    ~package:"seq2HLA.2.2" (* we need to uppercase HLA for opam *)
    Machine.Tool.Default.seq2hla

let optitype =
  install_target ~witness:"OptiTypePipeline" Machine.Tool.Default.optitype
    ~requires_conda:true
    ~init_environment:KEDSL.Program.(
        fun ~install_path ->
          let name = Machine.Tool.(Default.optitype.Definition.name) in
          shf "export OPAMROOT=%s" (Opam.root_of_package name |> Opam.root ~install_path)
          && shf "export OPTITYPE_DATA=$(%s config var lib)/optitype"
            (Opam.bin ~install_path)
      )

let default :
  run_program: Machine.Make_fun.t ->
  host: Common.KEDSL.Host.t ->
  install_path: string ->
  unit ->
  _ = fun ~run_program ~host ~install_path () ->
  Machine.Tool.Kit.of_list
    (List.map ~f:(provide ~run_program ~host ~install_path) [
    picard;
    bowtie;
    seq2hla;
    optitype;
  ])

end