htsvcf_core/reader.rs
1//! VCF/BCF file reader with optional index support.
2//!
3//! This module provides [`Reader`], a unified interface for reading VCF/BCF files
4//! that automatically detects and uses tabix (.tbi) or CSI (.csi) indices when
5//! available.
6//!
7//! # Index Detection
8//!
9//! When opening a file, the reader checks for index files at `{path}.tbi` and
10//! `{path}.csi`. If found, region queries are enabled via [`Reader::query`].
11//!
12//! # Example
13//!
14//! ```no_run
15//! use htsvcf_core::reader::open_reader;
16//!
17//! let mut reader = open_reader("input.vcf.gz").unwrap();
18//!
19//! // Check if indexed
20//! if reader.has_index() {
21//! // Query a specific region
22//! reader.query("chr1:1000-2000", None, None).unwrap();
23//! }
24//!
25//! // Iterate over records
26//! while let Ok(Some(record)) = reader.next_record() {
27//! println!("pos = {}", record.pos());
28//! }
29//! ```
30//!
31//! # Types
32//!
33//! - [`Reader`]: Main reader interface
34//! - [`InnerReader`]: Enum wrapping indexed or unindexed htslib readers
35//! - [`open_reader`]: Constructor that auto-detects index availability
36
37use rust_htslib::bcf;
38use rust_htslib::bcf::Read;
39
40use crate::region::parse_region_1based;
41
42/// Internal enum wrapping indexed or unindexed htslib readers.
43///
44/// This allows the `Reader` to work with both indexed and unindexed
45/// VCF/BCF files through a unified interface.
46#[derive(Debug)]
47pub enum InnerReader {
48 /// An unindexed reader (sequential access only).
49 Unindexed(bcf::Reader),
50 /// An indexed reader (supports region queries).
51 Indexed(bcf::IndexedReader),
52}
53
54impl InnerReader {
55 /// Get the raw header pointer.
56 pub fn header_ptr(&self) -> *mut rust_htslib::htslib::bcf_hdr_t {
57 match self {
58 InnerReader::Unindexed(r) => r.header().inner,
59 InnerReader::Indexed(r) => r.header().inner,
60 }
61 }
62
63 /// Create an empty record for reading into.
64 pub fn empty_record(&self) -> bcf::Record {
65 match self {
66 InnerReader::Unindexed(r) => r.empty_record(),
67 InnerReader::Indexed(r) => r.empty_record(),
68 }
69 }
70
71 /// Read the next record into the provided buffer.
72 ///
73 /// Returns `None` at EOF, `Some(Ok(()))` on success, or `Some(Err(...))` on error.
74 pub fn read_record(
75 &mut self,
76 record: &mut bcf::Record,
77 ) -> Option<Result<(), rust_htslib::errors::Error>> {
78 match self {
79 InnerReader::Unindexed(r) => r.read(record),
80 InnerReader::Indexed(r) => r.read(record),
81 }
82 }
83
84 /// Fetch records from a genomic region (indexed readers only).
85 ///
86 /// After calling this, subsequent `read_record` calls will return
87 /// only records overlapping the specified region.
88 pub fn fetch(
89 &mut self,
90 chrom: &str,
91 start0: u64,
92 end0: Option<u64>,
93 ) -> Result<(), rust_htslib::errors::Error> {
94 let InnerReader::Indexed(r) = self else {
95 return Err(rust_htslib::errors::Error::Fetch);
96 };
97
98 let rid = r.header().name2rid(chrom.as_bytes())?;
99 r.fetch(rid, start0, end0)
100 }
101}
102
103/// A VCF/BCF file reader with automatic index detection.
104///
105/// `Reader` provides a unified interface for reading VCF/BCF files.
106/// When opening a file, it automatically checks for index files
107/// (`.tbi` or `.csi`) and enables region queries if found.
108#[derive(Debug)]
109pub struct Reader {
110 inner: InnerReader,
111 has_index: bool,
112}
113
114/// Check if an index file exists on disk.
115fn has_index_on_disk(path: &str) -> bool {
116 let candidates = [format!("{path}.csi"), format!("{path}.tbi")];
117 candidates.iter().any(|p| std::fs::metadata(p).is_ok())
118}
119
120/// Open a VCF/BCF file for reading.
121///
122/// Automatically detects and uses tabix (`.tbi`) or CSI (`.csi`) indices
123/// when available. If an index is found, region queries are enabled.
124///
125/// # Example
126///
127/// ```no_run
128/// use htsvcf_core::reader::open_reader;
129///
130/// let mut reader = open_reader("input.vcf.gz").unwrap();
131/// while let Ok(Some(record)) = reader.next_record() {
132/// // process record
133/// }
134/// ```
135pub fn open_reader(path: &str) -> Result<Reader, rust_htslib::errors::Error> {
136 let has_index = has_index_on_disk(path);
137
138 let inner = if has_index {
139 match bcf::IndexedReader::from_path(path) {
140 Ok(r) => InnerReader::Indexed(r),
141 Err(_) => InnerReader::Unindexed(bcf::Reader::from_path(path)?),
142 }
143 } else {
144 InnerReader::Unindexed(bcf::Reader::from_path(path)?)
145 };
146
147 let has_index = matches!(inner, InnerReader::Indexed(_)) && has_index;
148 Ok(Reader { inner, has_index })
149}
150
151impl Reader {
152 /// Check if this reader has an index available.
153 ///
154 /// Returns `true` if region queries are supported.
155 pub fn has_index(&self) -> bool {
156 self.has_index
157 }
158
159 /// Get the raw header pointer.
160 pub fn header_ptr(&self) -> *mut rust_htslib::htslib::bcf_hdr_t {
161 self.inner.header_ptr()
162 }
163
164 /// Query by a 1-based region string (e.g., "chr1:1000-2000").
165 ///
166 /// After calling this, subsequent `next_record` calls will return
167 /// only records overlapping the specified region.
168 pub fn query_region_1based(&mut self, region: &str) -> Result<(), rust_htslib::errors::Error> {
169 let (chrom, start0, end0) =
170 parse_region_1based(region).ok_or(rust_htslib::errors::Error::Fetch)?;
171 self.query_chrom_start_end(&chrom, start0, end0)
172 }
173
174 /// Query by chromosome and 0-based coordinates.
175 ///
176 /// After calling this, subsequent `next_record` calls will return
177 /// only records overlapping the specified region.
178 pub fn query_chrom_start_end(
179 &mut self,
180 chrom: &str,
181 start0: u64,
182 end0: Option<u64>,
183 ) -> Result<(), rust_htslib::errors::Error> {
184 self.inner.fetch(chrom, start0, end0)
185 }
186
187 /// Query by region string ("chr1:1000-2000") or by coordinates.
188 /// If `start0` is None, treat `region_or_chrom` as a region string.
189 /// Otherwise, treat it as a chromosome name with numeric coordinates.
190 pub fn query(
191 &mut self,
192 region_or_chrom: &str,
193 start0: Option<u64>,
194 end0: Option<u64>,
195 ) -> Result<(), rust_htslib::errors::Error> {
196 match start0 {
197 None => self.query_region_1based(region_or_chrom),
198 Some(s) => self.query_chrom_start_end(region_or_chrom, s, end0),
199 }
200 }
201
202 /// Read the next record.
203 ///
204 /// Returns `Ok(Some(record))` if a record was read, `Ok(None)` at EOF,
205 /// or `Err(...)` on error.
206 pub fn next_record(&mut self) -> Result<Option<bcf::Record>, rust_htslib::errors::Error> {
207 let mut record = self.inner.empty_record();
208 match self.inner.read_record(&mut record) {
209 None => Ok(None),
210 Some(Ok(())) => Ok(Some(record)),
211 Some(Err(e)) => Err(e),
212 }
213 }
214}