Module falas

Types

Contig = ref object
  sequence*: string            ## the assembled sequence
  support*: seq[uint32]        ## number of reads covering each base in the contig.
  nreads*: int                 ## number of reads that contributed to this contig.
  start*: int
  reads*: seq[string]
  names*: seq[string] ## names are used to track fragments. it is recommended to send only
                    ## primary alignments (not supplementary). The user is responsible for sending
                    ## names that assure fragment-specificity (this should be a pretty minimal burden).
  
a contig is a collection of sequences that have been merged. the base_count indicates the number of reads supporting the contig at each base.
resolvable_mismatch_fn = proc (qsup: uint32; tsup: uint32; qreads: int; treads: int): bool

Vars

Version = falasVersion

Consts

unaligned = -9223372036854775808

Procs

proc `$`(c: Contig): string {...}{.raises: [], tags: [].}
string representation of a contig.
proc fastq(c: Contig; name: string): string {...}{.raises: [], tags: [].}
a fastq-representation of a contig with the base-qualities indicating the amount of support for each base (+33)
proc aligned(ma: Match): bool {...}{.inline, raises: [], tags: [].}
indicates whether a match is aligned.
proc len(c: Contig): int {...}{.inline, raises: [], tags: [].}
proc `[]`(c: Contig; i: int): char {...}{.inline, raises: [], tags: [].}
proc trim(c: var Contig; min_support: int = 2) {...}{.raises: [], tags: [].}
trim bases in a contig that do not have at least min_support
proc slide_align(q: Contig; t: var Contig; min_overlap: float64 = 0.7;
                max_mismatch: int = 0;
                resolved: resolvable_mismatch_fn = resolvable_mismatch): Match {...}{.
    raises: [], tags: [].}

align (q)uery to (t)arget requiring a number of bases of overlap and fewer than the specified mismatches. If unaligned, the constant unaligned is returned. a negative value indicates that the query extends the target to the left. a positive value indicates the start position of the query into the reference

default rule is to resolv a mismatch in q if qsupport < 3 and tsupport > 5 * qsupport

proc insert(t: var Contig; q: var Contig; m: var Match) {...}{.raises: [], tags: [].}
insert a contig and perform error correction for sites that were calculated during the matching.
proc insert(contigs: var seq[Contig]; q: var Contig; min_overlap: float64 = 0.8;
           max_mismatch: int = 0) {...}{.raises: [], tags: [].}
proc insert(contigs: var seq[Contig]; q: string; start: int; name: string = "";
           min_overlap: float64 = 0.8; max_mismatch: int = 0) {...}{.raises: [], tags: [].}
proc defrag(contigs: var seq[Contig]; min_overlap_bases: int = 10): seq[Contig] {...}{.
    raises: [], tags: [].}
merge contigs using frament information provided in the names attribute. Still requires min_overlap_bases of overlap which can be a much smaller number than we require for regular insertion since we know they are from the same fragment.
proc combine(contigs: var seq[Contig]; max_mismatch: int = 0; min_support: int = 3;
            min_overlap: float64 = 0.8; again: bool = true): seq[Contig] {...}{.raises: [],
    tags: [].}
merge contigs. note that this modifies the contigs in-place. again is only internal and should not be changed by the user.