Module Biokepi.EDSL (.ml)

module EDSL: sig .. end
The Embedded Bioinformatics Domain Specific Language

This Embedded DSL is implemented following the “Typed Tagless Final Interpreter” method.

It's usage is as follows:

Example:
     module Pipeline_1 (Bfx : Biokepi.EDSL.Semantics) = struct

       (* Reusable function withing the EDSL: *)
       let align_list_of_single_end_fastqs (l : string list) : [ `Bam ] Bfx.repr =
         let list_expression : [ `Fastq ] list Bfx.repr =
           List.map l ~f:(fun path ->
               (* create [ `Fastq ] repr term: *)
               Bfx.fastq ~sample_name:"Test" ~r1:path ())
           |> Bfx.list (* Assmble OCaml list into an EDSL list *)
         in
         let aligner : ([ `Fastq ] -> [ `Bam ]) Bfx.repr =
           (* create an EDSL-level function with `lambda`: *)
           Bfx.lambda (fun fq -> Bfx.bwa_aln ~reference_build:"hg19" fq)
         in
         (* Call the aligner on all fastq-terms and then merge the result
            into a single bam: *)
         Bfx.list_map list_expression ~f:aligner |> Bfx.merge_bams

       (* Function “exported” (to be used after compilation): *)
       let align_list l : [ `Bam ] Bfx.observation =
         Bfx.observe (fun () ->
             align_list_of_single_end_fastqs l
           )

     end
   

You can then compile this pipeline, (you can apply any sub-module of Biokepi.Compile, with potential Biokepi.Transform functors applied) for example to a dot-graph:

      let module Dotize_pipeline_1 =
        Pipeline_1(Biokepi.EDSL.Compile.To_dotin
      let pipeline_1_dot = test_dir // "pipeline-1.dot" in
      write_file pipeline_1_dot
        ~content:(Dotize_pipeline_1.align_list [ (* FASTQS *) ];
      let pipeline_1_png = test_dir // "pipeline-1.png" in
      cmdf "dot -v -Tpng  %s -o %s" pipeline_1_dot pipeline_1_png;
    

Or reuse it in further pipelines:

      module Pipeline_2 (Bfx : Biokepi.EDSL.Semantics) = struct
        module P1 = Pipeline_1(Bfx)
        (* use the function  P1.align_list_of_single_end_fastqs *)
      end
    

See the TTfi_pipeline test ("./src/test/ttfi_pipeline.ml") for more examples.

This framework is also extensible, one can add new constructs to the language or new transformations while reusing most of the work already done.


module type Semantics = Biokepi_pipeline_edsl.Semantics.Bioinformatics_base
The definition of the Embedded DSL
module Compile: sig .. end
Various compilers to “interpret” the EDSL.
module Transform: sig .. end
Transformations on the EDSL.
module Library: Biokepi_pipeline_edsl.Pipeline_library
Library of reusable pieces of pipeline.