vcf

Types

Header = ref object of RootObj
  hdr*: ptr bcf_hdr_t
Header wraps the bam header info.
VCF = ref object of RootObj
  hts: ptr htsFile
  header*: Header
  c: ptr bcf1_t
  bidx: ptr hts_idx_t
  tidx: ptr tbx_t
  fname: string
VCF is a VCF/BCF object
Variant = ref object
  c*: ptr bcf1_t
  p: pointer
  vcf*: VCF
  own: bool
Variant is a single line from a VCF
INFO = ref object
  v: Variant
  i: int32
INFO of a variant
FORMAT = ref object
  v*: Variant
  p*: pointer
FORMAT exposes access to the sample format fields in the VCF
CPtr[T] = ptr CArray[T]
Status {...}{.pure.} = enum
  IncorrectNumberOfValues = -10, ## when setting a FORMAT field, the number of values must be a multiple of the number of samples
  NotFound = -3,                ## Tag is not present in the Record
  UnexpectedType = -2,          ## E.g. user requested int when type was float.
  UndefinedTag = -1,            ## Tag is not present in the Header
  OK = 0                        ## Tag was found
contains the values returned from the INFO for FORMAT fields.
BCF_HEADER_TYPE {...}{.pure.} = enum
  BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT, BCF_HL_CTG, BCF_HL_STR, BCF_HL_GEN
BCF_TYPE {...}{.pure.} = enum
  NULL = 0, INT8 = 1, INT16 = 2, INT32 = 3, FLOAT = 5, CHAR = 7
HeaderRecord = object
  name*: string
  c: ptr bcf_hrec_t
HeaderRecord represents a row from the VCF header (hrec from htslib)
FormatField = object
  name*: string
  n_per_sample*: int           ## number of entries per sample
  vtype*: BCF_TYPE             ## variable type is one of the BCF_BT_* types.
  i*: int
FormatField represents the name (e.g. AD or DP), the number of values per sample, and the type (BCF_BT*) of a FORMAT field.
InfoField = object
  name*: string
  n*: int                      ## number of values. 1048575 means variable-length (Number=A)
  vtype*: BCF_TYPE
  i*: int
Contig = object
  name*: string
  length*: int64
Contig is a chromosome+length from the VCF header if the length is not found, it is set to -1
Genotypes {...}{.shallow.} = object
  gts: seq[int32]
  ploidy: int
Genotypes are the genotype calls for each sample. These are represented efficiently with the int32 values used in the underlying representation. However, we are able to efficiently manipulate them by adding methods to the base type.
Allele = distinct int32
Genotype = seq[Allele]

Vars

errno: cint

Procs

proc n_samples(v: VCF): int {...}{.inline, raises: [], tags: [].}
proc set_samples(v: VCF; samples: seq[string]) {...}{.raises: [], tags: [].}
set the samples that will be decoded
proc samples(v: VCF): seq[string] {...}{.raises: [], tags: [].}
get the list of samples
proc add_sample(v: VCF; sample: string) {...}{.raises: [], tags: [].}
add a sample to the VCF
proc add_string(h: Header; header: string): Status {...}{.inline, raises: [], tags: [].}
add the full string header to the VCF.
proc `$`(h: Header): string {...}{.raises: [ValueError], tags: [].}
return the string header
proc `$`(h: HeaderRecord): string {...}{.raises: [], tags: [].}
proc `[]`(h: HeaderRecord; key: string): string {...}{.raises: [KeyError], tags: [].}
get the value from the recode, key can be, for example ID or Description or Number or Type
proc get(h: Header; name: string; typ: BCF_HEADER_TYPE): HeaderRecord {...}{.
    raises: [KeyError], tags: [].}
get the HeaderRecord for the given name.
proc from_string(h: var Header; s: string) {...}{.raises: [ValueError], tags: [].}
create a new header from a VCF header string.
proc add_info(h: Header; ID: string; Number: string; Type: string; Description: string): Status {...}{.
    raises: [ValueError], tags: [].}
add an INFO field to the header with the given values
proc remove_info(h: Header; ID: string): Status {...}{.raises: [], tags: [].}
remove an INFO field from the header
proc add_format(h: Header; ID: string; Number: string; Type: string; Description: string): Status {...}{.
    raises: [ValueError], tags: [].}
add a FORMAT field to the header with the given values
proc remove_format(h: Header; ID: string): Status {...}{.raises: [], tags: [].}
remove a FORMAT field from the header
proc info(v: Variant): INFO {...}{.inline, noInit, raises: [], tags: [].}
proc format(v: Variant): FORMAT {...}{.inline, noInit, raises: [], tags: [].}
proc n_samples(v: Variant): int {...}{.inline, raises: [], tags: [].}
proc delete(f: FORMAT; key: string): Status {...}{.inline, raises: [KeyError], tags: [].}
delete the value from the FORMAT field for all samples
proc get(f: FORMAT; key: string; data: var seq[int32]): Status {...}{.inline, raises: [],
    tags: [].}
fill data with integer values for each sample with the given key
proc get(f: FORMAT; key: string; data: var seq[float32]): Status {...}{.inline, raises: [],
    tags: [].}
fill data with float values for each sample with the given key
proc get(f: FORMAT; key: string; data: var seq[string]): Status {...}{.inline, raises: [],
    tags: [].}
fill data with string values for each sample with the given key
proc set(f: FORMAT; key: string; data: var seq[string]): Status {...}{.inline, raises: [],
    tags: [].}
set the format field with the given strings.
proc set(f: FORMAT; key: string; values: var seq[int32]): Status {...}{.inline, raises: [],
    tags: [].}
set the sample fields. values must be a multiple of number of samples.
proc set(f: FORMAT; key: string; values: var seq[float32]): Status {...}{.inline, raises: [],
    tags: [].}
set the sample fields. values must be a multiple of number of samples.
proc get(i: INFO; key: string; data: var seq[int32]): Status {...}{.inline, raises: [], tags: [].}
fills the given data with ints associated with the key.
proc get(i: INFO; key: string; data: var seq[float32]): Status {...}{.inline, raises: [],
    tags: [].}
fills the given data with ints associated with the key. in many cases, the user will want only a single value; in that case data will have length 1 with the single value.
proc get(i: INFO; key: string; data: var string): Status {...}{.inline, raises: [], tags: [].}
fills the data with the value for the key and returns a Status indicating success
proc has_flag(i: INFO; key: string): bool {...}{.inline, raises: [], tags: [].}
return indicates whether the flag is found in the INFO.
proc delete(i: INFO; key: string): Status {...}{.inline, raises: [KeyError], tags: [].}
delete the value from the INFO field
proc set(i: INFO; key: string; value: var string): Status {...}{.inline, raises: [], tags: [].}
proc set(i: INFO; key: string; value: bool): Status {...}{.inline, raises: [], tags: [].}
set a flag (when value is true) and remove it (when value is false)
proc set[T: float32 | float | float64](i: INFO; key: string; value: var T): Status {...}{.inline.}
set the info key with the given float value).
proc set[T: int32 | int | int64](i: INFO; key: string; value: var T): Status {...}{.inline.}
set the info key with the given int value).
proc set(i: INFO; key: string; values: var seq[float32]): Status {...}{.inline, raises: [],
    tags: [].}
set the info key with the given float value(s).
proc set(i: INFO; key: string; values: var seq[int32]): Status {...}{.inline, raises: [],
    tags: [].}
set the info key with the given int values.
proc from_string(v: var Variant; h: Header; s: var string) {...}{.raises: [ValueError], tags: [].}
proc newVariant(): Variant {...}{.noInit, raises: [], tags: [].}
make an empty variant.
proc close(v: VCF) {...}{.raises: [], tags: [WriteIOEffect].}
proc copy_header(v: var VCF; hdr: Header) {...}{.raises: [], tags: [].}
proc write_header(v: VCF): bool {...}{.raises: [], tags: [].}
write a the header to the file (must have been opened in write mode) and return a bool for success.
proc write_variant(v: VCF; variant: Variant): bool {...}{.raises: [], tags: [].}
write a variant to the VCF opened in write mode and return a bool indicating success.
proc open(v: var VCF; fname: string; mode: string = "r";
         samples: seq[string] = empty_samples; threads: int = 0): bool {...}{.
    raises: [IOError, ValueError, OSError], tags: [WriteIOEffect].}
open a VCF at the given path
proc CHROM(v: Variant): cstring {...}{.inline, raises: [], tags: [].}
return the chromosome associated with the variant
proc rid(v: Variant): int32 {...}{.inline, raises: [], tags: [].}
return the reference id of the variant.
proc tostring(v: Variant): string {...}{.raises: [ValueError], tags: [].}
return the full variant string including new-line from vcf_format.
proc `$`(c: Contig): string {...}{.raises: [ValueError], tags: [].}
proc load_index(v: VCF; path: string; force: bool = false) {...}{.raises: [OSError], tags: [].}
load the index at the given path (remote or local).
proc contigs(v: VCF): seq[Contig] {...}{.raises: [OSError, Exception], tags: [RootEffect].}
proc copy(v: Variant): Variant {...}{.raises: [], tags: [].}
make a copy of the variant and the underlying pointer.
proc POS(v: Variant): int64 {...}{.inline, raises: [], tags: [].}
return the 1-based position of the start of the variant
proc start(v: Variant): int64 {...}{.inline, raises: [], tags: [].}
return the 0-based position of the start of the variant
proc stop(v: Variant): int64 {...}{.inline, raises: [], tags: [].}
return the 0-based position of the end of the variant
proc ID(v: Variant): cstring {...}{.inline, raises: [], tags: [].}
the VCF ID field
proc ID=(v: Variant; value: string) {...}{.inline, raises: [ValueError], tags: [].}
Set the ID value, third column in the VCF spec.
proc FILTER(v: Variant): string {...}{.inline, raises: [], tags: [].}
Return a string representation of the FILTER will be ';' delimited for multiple values
proc QUAL(v: Variant; default: float = 0): float {...}{.inline, raises: [], tags: [].}
variant quality; returns default if it was unspecified in the VCF
proc QUAL=(v: Variant; value: float) {...}{.inline, raises: [], tags: [].}
proc REF(v: Variant): string {...}{.inline, raises: [], tags: [].}
the reference allele
proc ALT(v: Variant): seq[string] {...}{.inline, raises: [], tags: [].}
a seq of alternate alleles
proc REF=(v: Variant; allele: string) {...}{.inline, raises: [], tags: [].}
the reference allele
proc ALT=(v: Variant; alleles: string | seq[string]) {...}{.inline.}
the reference allele
proc copy(g: Genotypes): Genotypes {...}{.raises: [], tags: [].}
make a copy of the genotypes
proc phased(a: Allele): bool {...}{.inline, raises: [], tags: [].}
is the allele phased.
proc value(a: Allele): int {...}{.inline, raises: [], tags: [].}
e.g. 0 for REF, 1 for first alt, -1 for unknown.
proc `[]`(g: Genotypes; i: int): Genotype {...}{.inline, raises: [], tags: [].}
proc len(g: Genotypes): int {...}{.inline, raises: [], tags: [].}
this should match the number of samples.
proc `$`(a: Allele): string {...}{.inline, raises: [], tags: [].}
string representation of a single allele.
proc `$`(g: Genotype): string {...}{.inline, raises: [], tags: [].}
string representation of a genotype. removes trailing phase value.
proc alts(g: Genotype): int8 {...}{.inline, raises: [], tags: [].}
the number of alternate alleles in the genotype. only makes sense for bi-allelics. ./1 == 1 0/. == 0 ./. -> -1 1/1 -> 2
proc genotypes(f: FORMAT; gts: var seq[int32]): Genotypes {...}{.inline, raises: [], tags: [].}
give sequence of genotypes (using the underlying array given in gts)
proc `$`(gs: Genotypes): string {...}{.raises: [], tags: [].}
proc alts(gs: Genotypes): seq[int8] {...}{.inline, raises: [], tags: [].}
return the number of alternate alleles. Unknown is -1.
proc `$`(v: Variant): string {...}{.raises: [ValueError], tags: [].}

Iterators

iterator fields(f: FORMAT): FormatField {...}{.inline, raises: [], tags: [].}
iterator fields(info: INFO): InfoField {...}{.raises: [], tags: [].}
iterator items(v: VCF): Variant {...}{.raises: [IOError, ValueError], tags: [WriteIOEffect].}
Each returned Variant has a pointer in the underlying iterator that is updated each iteration; use .copy to keep it in memory
iterator query(v: VCF; region: string): Variant {...}{.raises: [IOError, ValueError],
    tags: [WriteIOEffect].}
iterator items(g: Genotypes): Genotype {...}{.raises: [], tags: [].}