htsvcf_core/lib.rs
1//! Core VCF/BCF parsing library built on HTSlib.
2//!
3//! This crate provides the shared implementation for reading VCF/BCF files
4//! and accessing variant data. It is used by both the V8 binding (`htsvcf`)
5//! and the Node-API binding (`htsvcf-napi`). It has some nice additions to rust-htslib, but
6//! it's not likely you'd need to use it directly.
7//!
8//! # Overview
9//!
10//! The main types are:
11//! - [`Reader`] - Opens and iterates VCF/BCF files (with optional index-based queries)
12//! - [`Header`] - Access VCF header metadata (INFO/FORMAT definitions, samples)
13//! - [`Variant`] - A single VCF record with typed accessors for all fields
14//!
15//! # Example: Reading, modifying, and writing a VCF
16//!
17//! ```no_run
18//! use htsvcf_core::{open_reader, open_writer, Header, Variant, WriterOptions};
19//!
20//! // Open input VCF and get a copy of its header
21//! let mut reader = open_reader("input.vcf.gz").expect("failed to open");
22//! let header = unsafe { Header::new(reader.header_ptr()) };
23//!
24//! // Add a new INFO field to the header
25//! header.add_info("VARIANT_LENGTH", "1", "Integer", "Length of variant (REF - ALT)");
26//!
27//! // Open writer with the modified header
28//! let mut writer = open_writer("output.vcf.gz", &header, WriterOptions::default())
29//! .expect("failed to create writer");
30//!
31//! while let Ok(Some(record)) = reader.next_record() {
32//! let mut variant = Variant::from_record(record);
33//!
34//! // Translate the record to the new header (required after adding INFO fields)
35//! variant.translate(&header).expect("translate failed");
36//!
37//! // Access basic fields
38//! println!("{}:{} {} -> {:?}",
39//! variant.chrom(),
40//! variant.pos(), // 1-based position
41//! variant.reference(),
42//! variant.alts()
43//! );
44//!
45//! // Compute and set the new INFO field
46//! let ref_len = variant.reference().len() as i32;
47//! let alt_len = variant.alts().first().map(|a| a.len() as i32).unwrap_or(0);
48//! variant.set_info_integer(&header, "VARIANT_LENGTH", &[ref_len - alt_len]).unwrap();
49//!
50//! // Write the modified record
51//! writer.write_record(variant.record_mut()).expect("write failed");
52//! }
53//! ```
54//!
55//! # Example: Accessing sample data
56//!
57//! ```no_run
58//! use htsvcf_core::{open_reader, Header, Variant, FormatValue};
59//!
60//! let mut reader = open_reader("input.vcf.gz").unwrap();
61//! let header = unsafe { Header::new(reader.header_ptr()) };
62//!
63//! if let Ok(Some(record)) = reader.next_record() {
64//! let variant = Variant::from_record(record);
65//!
66//! // Get data for a specific sample by name
67//! if let Some(fields) = variant.sample(&header, "SAMPLE1") {
68//! for (tag, value) in fields {
69//! match value {
70//! FormatValue::Int(v) => println!(" {}: {}", tag, v),
71//! FormatValue::Float(v) => println!(" {}: {}", tag, v),
72//! FormatValue::String(v) => println!(" {}: {}", tag, v),
73//! FormatValue::Array(vals) => println!(" {}: {:?}", tag, vals),
74//! _ => {}
75//! }
76//! }
77//! }
78//!
79//! // Get data for all samples at once (more efficient)
80//! for sample_data in variant.samples(&header, None) {
81//! // sample_data is Vec<(String, FormatValue)> with a "sample_name" key
82//! println!("{:?}", sample_data);
83//! }
84//!
85//! // Get data for a subset of samples
86//! let subset = variant.samples(&header, Some(&["SAMPLE1", "SAMPLE3"]));
87//! }
88//! ```
89//!
90//! # Example: Modifying variant data
91//!
92//! ```no_run
93//! use htsvcf_core::{open_reader, Header, Variant};
94//!
95//! let mut reader = open_reader("input.vcf.gz").unwrap();
96//! let header = unsafe { Header::new(reader.header_ptr()) };
97//!
98//! if let Ok(Some(record)) = reader.next_record() {
99//! let mut variant = Variant::from_record(record);
100//!
101//! // Modify basic fields
102//! variant.set_id("rs12345").unwrap();
103//! variant.set_qual(Some(30.0));
104//! variant.set_filters(&["PASS".to_string()]).unwrap();
105//!
106//! // Modify INFO fields (must match header type)
107//! variant.set_info_integer(&header, "DP", &[42]).unwrap();
108//! variant.set_info_float(&header, "AF", &[0.25, 0.75]).unwrap();
109//! variant.set_info_flag(&header, "SOMATIC", true).unwrap();
110//!
111//! // Clear an INFO field
112//! variant.clear_info(&header, "DP").unwrap();
113//!
114//! // Modify genotypes (one per sample)
115//! use htsvcf_core::Genotype;
116//! variant.set_genotypes(&[
117//! Genotype { alleles: vec![Some(0), Some(1)], phase: vec![false] }, // 0/1
118//! Genotype { alleles: vec![Some(1), Some(1)], phase: vec![true] }, // 1|1
119//! ]).unwrap();
120//!
121//! // Output as VCF line
122//! if let Some(line) = variant.to_string(&header) {
123//! println!("{}", line);
124//! }
125//! }
126//! ```
127//!
128//! # Value types
129//!
130//! [`InfoValue`] represents INFO field values:
131//! - `Absent` - Tag not present in record
132//! - `Missing` - Tag present but value is `.`
133//! - `Bool(bool)` - Flag type
134//! - `Int(i32)` - Single integer
135//! - `Float(f32)` - Single float
136//! - `String(String)` - Single string
137//! - `Array(Vec<InfoValue>)` - Multiple values
138//!
139//! [`FormatValue`] represents FORMAT field values:
140//! - `Absent` - Tag not present
141//! - `Missing` - Value is `.`
142//! - `Int(i32)`, `Float(f32)`, `String(String)` - Scalar values
143//! - `Array(Vec<FormatValue>)` - Multi-value field for one sample
144//! - `PerSample(Vec<FormatValue>)` - Array of values, one per sample
145
146pub mod genotype;
147pub mod header;
148pub mod reader;
149pub mod region;
150pub mod variant;
151pub mod writer;
152
153pub use genotype::{record_genotypes, record_set_genotypes, Genotype};
154pub use header::Header;
155pub use reader::{open_reader, InnerReader, Reader};
156pub use variant::{
157 format_float_missing, format_int_missing, get_format_tag_names, record_clear_format,
158 record_clear_info, record_format, record_info, record_sample, record_samples,
159 record_set_format_float, record_set_format_integer, record_set_format_string,
160 record_set_info_flag, record_set_info_float, record_set_info_integer, record_set_info_string,
161 record_to_string, FormatValue, InfoValue, Variant,
162};
163pub use writer::{open_writer, OutputFormat, Writer, WriterOptions};