MicroRegionPartitioner

Assign loci from a LociSet to partitions, where each partition overlaps approximately the same number of "regions" (reads mapped to a reference genome).

The approach we take is:

(1) chop up the loci uniformly into many genomic "micro partitions."

(2) for each micro partition, calculate the number of regions that overlap it.

(3) using these counts, assign loci to real (Spark) partitions, assuming approximately uniform depth within each micro partition.

Some advantages of this approach are:

Stages (1) and (3), which are done locally by the Spark master, are constant time with respect to the number of regions (though linear in the number of micro-partitions).
Stage (2), where runtime does depend on the number of regions, is done in parallel with Spark.
Accuracy vs. performance can be tuned by setting accuracy.
Does not require a distributed sort.

returns: LociMap of locus -> partition assignments.

Linear Supertypes

LociPartitioner, AnyRef, Any

Instance Constructors

new MicroRegionPartitioner(regions: RDD[R], numPartitions: NumPartitions, microPartitionsPerPartition: NumMicroPartitions)(implicit arg0: ClassTag[R])

numPartitions
Number of partitions to split reads into.
microPartitionsPerPartition
Long >= 1. Number of micro-partitions generated for each of the numPartitions Spark partitions that will be computed. Higher values of this will result in a more exact but more expensive computation. In the extreme, setting this to greater than the number of loci (per partition) will result in an exact calculation.

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def partition(loci: LociSet): LociPartitioning

Definition Classes
MicroRegionPartitioner → LociPartitioner
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class MicroRegionPartitioner[R <: ReferenceRegion] extends LociPartitioner

Instance Constructors

new MicroRegionPartitioner(regions: RDD[R], numPartitions: NumPartitions, microPartitionsPerPartition: NumMicroPartitions)(implicit arg0: ClassTag[R])

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def partition(loci: LociSet): LociPartitioning

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from LociPartitioner

Inherited from AnyRef

Inherited from Any

Ungrouped