hts-nim is a library to enable writing fast, efficient tools and libraries using the Nim programming language. It is available at

Nim has a simple syntax and compiles to C code. In our experience it is as fast as code written in C. We have previously published mosdepth, a tool for fast depth calculation that uses hts-nim.

The hts-nim-tools repository also contains several tools written with hts-nim that can serve as documentation for how to use the library.

Below we show short snippets of example usage of the library.


import hts

# open a bam and look for the index.
var b:Bam
open(b, "test/HG02002.bam", index=true)

for record in b:
  if record.qual > 10:
    echo record.chrom, record.start, record.stop

# regional queries:
for record in b.query('6', 30816675, 32816675):
  if record.flag.proper_pair and record.flag.reverse:
    # cigar is an iterable of operations:
    for op in record.cigar:
      # $op gives the string repr of the operation, e.g. '151M'
      echo $op, op.consumes.reference, op.consumes.query

  # tags are pulled with `aux`
  var mismatches = record.aux("NM")
  if mismatches != nil and mismatches.integer() < 3:
    var rg = rec.aux("RG")
    echo rg.tostring()

# cram requires an fasta to decode:
var cram:Bam
open(cram, "/tmp/t.cram", fai="/data/human/g1k_v37_decoy.fa")
for record in cram:
  # now record is same as from bam above
  echo record.qname, record.isize


import hts

var tsamples = @["101976-101976", "100920-100920", "100231-100231", "100232-100232", "100919-100919"]

# VCF and BCF supported
var v:VCF
doAssert open(v, "tests/test.bcf", samples=tsamples)

var afs = new_seq[float32](5) # size doesn't matter. this will be re-sized as needed
var acs = new_seq[int32](5) # size doesn't matter. this will be re-sized as needed
var csq = new_string_of_cap(20)
for rec in v:
  echo rec, " qual:", rec.QUAL, " filter:", rec.FILTER
  var info =
  # accessing stuff from the INFO fields is meant to be as fast as possible, allowing
  # the user to re-use memory as needed.
  info.get("CSQ", csq) # string
  info.get("AC", acs) # ints
  info.get("AF", afs) # floats
  echo acs, afs, csq, info.has_flag("IN_EXAC")

  # accessing format fields is similar
  var format = ref.format
  var dps = new_seq[int32](len(v.samples))
  doAssert format.get("DP", dps)
  echo dps

  echo v.samples

  # regional queries look for index. works for VCF and BCF
  for rec in v.query("1:15600-18250"):
    echo rec.CHROM, ":", $rec.POS
