Expand description
Core VCF/BCF parsing library built on HTSlib.
This crate provides the shared implementation for reading VCF/BCF files
and accessing variant data. It is used by both the V8 binding (htsvcf)
and the Node-API binding (htsvcf-napi). It has some nice additions to rust-htslib, but
it’s not likely you’d need to use it directly.
§Overview
The main types are:
Reader- Opens and iterates VCF/BCF files (with optional index-based queries)Header- Access VCF header metadata (INFO/FORMAT definitions, samples)Variant- A single VCF record with typed accessors for all fields
§Example: Reading, modifying, and writing a VCF
use htsvcf_core::{open_reader, open_writer, Header, Variant, WriterOptions};
// Open input VCF and get a copy of its header
let mut reader = open_reader("input.vcf.gz").expect("failed to open");
let header = unsafe { Header::new(reader.header_ptr()) };
// Add a new INFO field to the header
header.add_info("VARIANT_LENGTH", "1", "Integer", "Length of variant (REF - ALT)");
// Open writer with the modified header
let mut writer = open_writer("output.vcf.gz", &header, WriterOptions::default())
.expect("failed to create writer");
while let Ok(Some(record)) = reader.next_record() {
let mut variant = Variant::from_record(record);
// Translate the record to the new header (required after adding INFO fields)
variant.translate(&header).expect("translate failed");
// Access basic fields
println!("{}:{} {} -> {:?}",
variant.chrom(),
variant.pos(), // 1-based position
variant.reference(),
variant.alts()
);
// Compute and set the new INFO field
let ref_len = variant.reference().len() as i32;
let alt_len = variant.alts().first().map(|a| a.len() as i32).unwrap_or(0);
variant.set_info_integer(&header, "VARIANT_LENGTH", &[ref_len - alt_len]).unwrap();
// Write the modified record
writer.write_record(variant.record_mut()).expect("write failed");
}§Example: Accessing sample data
use htsvcf_core::{open_reader, Header, Variant, FormatValue};
let mut reader = open_reader("input.vcf.gz").unwrap();
let header = unsafe { Header::new(reader.header_ptr()) };
if let Ok(Some(record)) = reader.next_record() {
let variant = Variant::from_record(record);
// Get data for a specific sample by name
if let Some(fields) = variant.sample(&header, "SAMPLE1") {
for (tag, value) in fields {
match value {
FormatValue::Int(v) => println!(" {}: {}", tag, v),
FormatValue::Float(v) => println!(" {}: {}", tag, v),
FormatValue::String(v) => println!(" {}: {}", tag, v),
FormatValue::Array(vals) => println!(" {}: {:?}", tag, vals),
_ => {}
}
}
}
// Get data for all samples at once (more efficient)
for sample_data in variant.samples(&header, None) {
// sample_data is Vec<(String, FormatValue)> with a "sample_name" key
println!("{:?}", sample_data);
}
// Get data for a subset of samples
let subset = variant.samples(&header, Some(&["SAMPLE1", "SAMPLE3"]));
}§Example: Modifying variant data
use htsvcf_core::{open_reader, Header, Variant};
let mut reader = open_reader("input.vcf.gz").unwrap();
let header = unsafe { Header::new(reader.header_ptr()) };
if let Ok(Some(record)) = reader.next_record() {
let mut variant = Variant::from_record(record);
// Modify basic fields
variant.set_id("rs12345").unwrap();
variant.set_qual(Some(30.0));
variant.set_filters(&["PASS".to_string()]).unwrap();
// Modify INFO fields (must match header type)
variant.set_info_integer(&header, "DP", &[42]).unwrap();
variant.set_info_float(&header, "AF", &[0.25, 0.75]).unwrap();
variant.set_info_flag(&header, "SOMATIC", true).unwrap();
// Clear an INFO field
variant.clear_info(&header, "DP").unwrap();
// Modify genotypes (one per sample)
use htsvcf_core::Genotype;
variant.set_genotypes(&[
Genotype { alleles: vec![Some(0), Some(1)], phase: vec![false] }, // 0/1
Genotype { alleles: vec![Some(1), Some(1)], phase: vec![true] }, // 1|1
]).unwrap();
// Output as VCF line
if let Some(line) = variant.to_string(&header) {
println!("{}", line);
}
}§Value types
InfoValue represents INFO field values:
Absent- Tag not present in recordMissing- Tag present but value is.Bool(bool)- Flag typeInt(i32)- Single integerFloat(f32)- Single floatString(String)- Single stringArray(Vec<InfoValue>)- Multiple values
FormatValue represents FORMAT field values:
Absent- Tag not presentMissing- Value is.Int(i32),Float(f32),String(String)- Scalar valuesArray(Vec<FormatValue>)- Multi-value field for one samplePerSample(Vec<FormatValue>)- Array of values, one per sample
Re-exports§
pub use genotype::record_genotypes;pub use genotype::record_set_genotypes;pub use genotype::Genotype;pub use header::Header;pub use reader::open_reader;pub use reader::InnerReader;pub use reader::Reader;pub use variant::format_float_missing;pub use variant::format_int_missing;pub use variant::get_format_tag_names;pub use variant::record_clear_format;pub use variant::record_clear_info;pub use variant::record_format;pub use variant::record_info;pub use variant::record_sample;pub use variant::record_samples;pub use variant::record_set_format_float;pub use variant::record_set_format_integer;pub use variant::record_set_format_string;pub use variant::record_set_info_flag;pub use variant::record_set_info_float;pub use variant::record_set_info_integer;pub use variant::record_set_info_string;pub use variant::record_to_string;pub use variant::FormatValue;pub use variant::InfoValue;pub use variant::Variant;pub use writer::open_writer;pub use writer::OutputFormat;pub use writer::Writer;pub use writer::WriterOptions;