htsvcf_core/
lib.rs

1//! Core VCF/BCF parsing library built on HTSlib.
2//!
3//! This crate provides the shared implementation for reading VCF/BCF files
4//! and accessing variant data. It is used by both the V8 binding (`htsvcf`)
5//! and the Node-API binding (`htsvcf-napi`). It has some nice additions to rust-htslib, but
6//! it's not likely you'd need to use it directly.
7//!
8//! # Overview
9//!
10//! The main types are:
11//! - [`Reader`] - Opens and iterates VCF/BCF files (with optional index-based queries)
12//! - [`Header`] - Access VCF header metadata (INFO/FORMAT definitions, samples)
13//! - [`Variant`] - A single VCF record with typed accessors for all fields
14//!
15//! # Example: Reading, modifying, and writing a VCF
16//!
17//! ```no_run
18//! use htsvcf_core::{open_reader, open_writer, Header, Variant, WriterOptions};
19//!
20//! // Open input VCF and get a copy of its header
21//! let mut reader = open_reader("input.vcf.gz").expect("failed to open");
22//! let header = unsafe { Header::new(reader.header_ptr()) };
23//!
24//! // Add a new INFO field to the header
25//! header.add_info("VARIANT_LENGTH", "1", "Integer", "Length of variant (REF - ALT)");
26//!
27//! // Open writer with the modified header
28//! let mut writer = open_writer("output.vcf.gz", &header, WriterOptions::default())
29//!     .expect("failed to create writer");
30//!
31//! while let Ok(Some(record)) = reader.next_record() {
32//!     let mut variant = Variant::from_record(record);
33//!
34//!     // Translate the record to the new header (required after adding INFO fields)
35//!     variant.translate(&header).expect("translate failed");
36//!
37//!     // Access basic fields
38//!     println!("{}:{} {} -> {:?}",
39//!         variant.chrom(),
40//!         variant.pos(),      // 1-based position
41//!         variant.reference(),
42//!         variant.alts()
43//!     );
44//!
45//!     // Compute and set the new INFO field
46//!     let ref_len = variant.reference().len() as i32;
47//!     let alt_len = variant.alts().first().map(|a| a.len() as i32).unwrap_or(0);
48//!     variant.set_info_integer(&header, "VARIANT_LENGTH", &[ref_len - alt_len]).unwrap();
49//!
50//!     // Write the modified record
51//!     writer.write_record(variant.record_mut()).expect("write failed");
52//! }
53//! ```
54//!
55//! # Example: Accessing sample data
56//!
57//! ```no_run
58//! use htsvcf_core::{open_reader, Header, Variant, FormatValue};
59//!
60//! let mut reader = open_reader("input.vcf.gz").unwrap();
61//! let header = unsafe { Header::new(reader.header_ptr()) };
62//!
63//! if let Ok(Some(record)) = reader.next_record() {
64//!     let variant = Variant::from_record(record);
65//!
66//!     // Get data for a specific sample by name
67//!     if let Some(fields) = variant.sample(&header, "SAMPLE1") {
68//!         for (tag, value) in fields {
69//!             match value {
70//!                 FormatValue::Int(v) => println!("  {}: {}", tag, v),
71//!                 FormatValue::Float(v) => println!("  {}: {}", tag, v),
72//!                 FormatValue::String(v) => println!("  {}: {}", tag, v),
73//!                 FormatValue::Array(vals) => println!("  {}: {:?}", tag, vals),
74//!                 _ => {}
75//!             }
76//!         }
77//!     }
78//!
79//!     // Get data for all samples at once (more efficient)
80//!     for sample_data in variant.samples(&header, None) {
81//!         // sample_data is Vec<(String, FormatValue)> with a "sample_name" key
82//!         println!("{:?}", sample_data);
83//!     }
84//!
85//!     // Get data for a subset of samples
86//!     let subset = variant.samples(&header, Some(&["SAMPLE1", "SAMPLE3"]));
87//! }
88//! ```
89//!
90//! # Example: Modifying variant data
91//!
92//! ```no_run
93//! use htsvcf_core::{open_reader, Header, Variant};
94//!
95//! let mut reader = open_reader("input.vcf.gz").unwrap();
96//! let header = unsafe { Header::new(reader.header_ptr()) };
97//!
98//! if let Ok(Some(record)) = reader.next_record() {
99//!     let mut variant = Variant::from_record(record);
100//!
101//!     // Modify basic fields
102//!     variant.set_id("rs12345").unwrap();
103//!     variant.set_qual(Some(30.0));
104//!     variant.set_filters(&["PASS".to_string()]).unwrap();
105//!
106//!     // Modify INFO fields (must match header type)
107//!     variant.set_info_integer(&header, "DP", &[42]).unwrap();
108//!     variant.set_info_float(&header, "AF", &[0.25, 0.75]).unwrap();
109//!     variant.set_info_flag(&header, "SOMATIC", true).unwrap();
110//!
111//!     // Clear an INFO field
112//!     variant.clear_info(&header, "DP").unwrap();
113//!
114//!     // Modify genotypes (one per sample)
115//!     use htsvcf_core::Genotype;
116//!     variant.set_genotypes(&[
117//!         Genotype { alleles: vec![Some(0), Some(1)], phase: vec![false] }, // 0/1
118//!         Genotype { alleles: vec![Some(1), Some(1)], phase: vec![true] },  // 1|1
119//!     ]).unwrap();
120//!
121//!     // Output as VCF line
122//!     if let Some(line) = variant.to_string(&header) {
123//!         println!("{}", line);
124//!     }
125//! }
126//! ```
127//!
128//! # Value types
129//!
130//! [`InfoValue`] represents INFO field values:
131//! - `Absent` - Tag not present in record
132//! - `Missing` - Tag present but value is `.`
133//! - `Bool(bool)` - Flag type
134//! - `Int(i32)` - Single integer
135//! - `Float(f32)` - Single float
136//! - `String(String)` - Single string
137//! - `Array(Vec<InfoValue>)` - Multiple values
138//!
139//! [`FormatValue`] represents FORMAT field values:
140//! - `Absent` - Tag not present
141//! - `Missing` - Value is `.`
142//! - `Int(i32)`, `Float(f32)`, `String(String)` - Scalar values
143//! - `Array(Vec<FormatValue>)` - Multi-value field for one sample
144//! - `PerSample(Vec<FormatValue>)` - Array of values, one per sample
145
146pub mod genotype;
147pub mod header;
148pub mod reader;
149pub mod region;
150pub mod variant;
151pub mod writer;
152
153pub use genotype::{record_genotypes, record_set_genotypes, Genotype};
154pub use header::Header;
155pub use reader::{open_reader, InnerReader, Reader};
156pub use variant::{
157    format_float_missing, format_int_missing, get_format_tag_names, record_clear_format,
158    record_clear_info, record_format, record_info, record_sample, record_samples,
159    record_set_format_float, record_set_format_integer, record_set_format_string,
160    record_set_info_flag, record_set_info_float, record_set_info_integer, record_set_info_string,
161    record_to_string, FormatValue, InfoValue, Variant,
162};
163pub use writer::{open_writer, OutputFormat, Writer, WriterOptions};