htsvcf_core/
reader.rs

1//! VCF/BCF file reader with optional index support.
2//!
3//! This module provides [`Reader`], a unified interface for reading VCF/BCF files
4//! that automatically detects and uses tabix (.tbi) or CSI (.csi) indices when
5//! available.
6//!
7//! # Index Detection
8//!
9//! When opening a file, the reader checks for index files at `{path}.tbi` and
10//! `{path}.csi`. If found, region queries are enabled via [`Reader::query`].
11//!
12//! # Example
13//!
14//! ```no_run
15//! use htsvcf_core::reader::open_reader;
16//!
17//! let mut reader = open_reader("input.vcf.gz").unwrap();
18//!
19//! // Check if indexed
20//! if reader.has_index() {
21//!     // Query a specific region
22//!     reader.query("chr1:1000-2000", None, None).unwrap();
23//! }
24//!
25//! // Iterate over records
26//! while let Ok(Some(record)) = reader.next_record() {
27//!     println!("pos = {}", record.pos());
28//! }
29//! ```
30//!
31//! # Types
32//!
33//! - [`Reader`]: Main reader interface
34//! - [`InnerReader`]: Enum wrapping indexed or unindexed htslib readers
35//! - [`open_reader`]: Constructor that auto-detects index availability
36
37use rust_htslib::bcf;
38use rust_htslib::bcf::Read;
39
40use crate::region::parse_region_1based;
41
42/// Internal enum wrapping indexed or unindexed htslib readers.
43///
44/// This allows the `Reader` to work with both indexed and unindexed
45/// VCF/BCF files through a unified interface.
46#[derive(Debug)]
47pub enum InnerReader {
48    /// An unindexed reader (sequential access only).
49    Unindexed(bcf::Reader),
50    /// An indexed reader (supports region queries).
51    Indexed(bcf::IndexedReader),
52}
53
54impl InnerReader {
55    /// Get the raw header pointer.
56    pub fn header_ptr(&self) -> *mut rust_htslib::htslib::bcf_hdr_t {
57        match self {
58            InnerReader::Unindexed(r) => r.header().inner,
59            InnerReader::Indexed(r) => r.header().inner,
60        }
61    }
62
63    /// Create an empty record for reading into.
64    pub fn empty_record(&self) -> bcf::Record {
65        match self {
66            InnerReader::Unindexed(r) => r.empty_record(),
67            InnerReader::Indexed(r) => r.empty_record(),
68        }
69    }
70
71    /// Read the next record into the provided buffer.
72    ///
73    /// Returns `None` at EOF, `Some(Ok(()))` on success, or `Some(Err(...))` on error.
74    pub fn read_record(
75        &mut self,
76        record: &mut bcf::Record,
77    ) -> Option<Result<(), rust_htslib::errors::Error>> {
78        match self {
79            InnerReader::Unindexed(r) => r.read(record),
80            InnerReader::Indexed(r) => r.read(record),
81        }
82    }
83
84    /// Fetch records from a genomic region (indexed readers only).
85    ///
86    /// After calling this, subsequent `read_record` calls will return
87    /// only records overlapping the specified region.
88    pub fn fetch(
89        &mut self,
90        chrom: &str,
91        start0: u64,
92        end0: Option<u64>,
93    ) -> Result<(), rust_htslib::errors::Error> {
94        let InnerReader::Indexed(r) = self else {
95            return Err(rust_htslib::errors::Error::Fetch);
96        };
97
98        let rid = r.header().name2rid(chrom.as_bytes())?;
99        r.fetch(rid, start0, end0)
100    }
101}
102
103/// A VCF/BCF file reader with automatic index detection.
104///
105/// `Reader` provides a unified interface for reading VCF/BCF files.
106/// When opening a file, it automatically checks for index files
107/// (`.tbi` or `.csi`) and enables region queries if found.
108#[derive(Debug)]
109pub struct Reader {
110    inner: InnerReader,
111    has_index: bool,
112}
113
114/// Check if an index file exists on disk.
115fn has_index_on_disk(path: &str) -> bool {
116    let candidates = [format!("{path}.csi"), format!("{path}.tbi")];
117    candidates.iter().any(|p| std::fs::metadata(p).is_ok())
118}
119
120/// Open a VCF/BCF file for reading.
121///
122/// Automatically detects and uses tabix (`.tbi`) or CSI (`.csi`) indices
123/// when available. If an index is found, region queries are enabled.
124///
125/// # Example
126///
127/// ```no_run
128/// use htsvcf_core::reader::open_reader;
129///
130/// let mut reader = open_reader("input.vcf.gz").unwrap();
131/// while let Ok(Some(record)) = reader.next_record() {
132///     // process record
133/// }
134/// ```
135pub fn open_reader(path: &str) -> Result<Reader, rust_htslib::errors::Error> {
136    let has_index = has_index_on_disk(path);
137
138    let inner = if has_index {
139        match bcf::IndexedReader::from_path(path) {
140            Ok(r) => InnerReader::Indexed(r),
141            Err(_) => InnerReader::Unindexed(bcf::Reader::from_path(path)?),
142        }
143    } else {
144        InnerReader::Unindexed(bcf::Reader::from_path(path)?)
145    };
146
147    let has_index = matches!(inner, InnerReader::Indexed(_)) && has_index;
148    Ok(Reader { inner, has_index })
149}
150
151impl Reader {
152    /// Check if this reader has an index available.
153    ///
154    /// Returns `true` if region queries are supported.
155    pub fn has_index(&self) -> bool {
156        self.has_index
157    }
158
159    /// Get the raw header pointer.
160    pub fn header_ptr(&self) -> *mut rust_htslib::htslib::bcf_hdr_t {
161        self.inner.header_ptr()
162    }
163
164    /// Query by a 1-based region string (e.g., "chr1:1000-2000").
165    ///
166    /// After calling this, subsequent `next_record` calls will return
167    /// only records overlapping the specified region.
168    pub fn query_region_1based(&mut self, region: &str) -> Result<(), rust_htslib::errors::Error> {
169        let (chrom, start0, end0) =
170            parse_region_1based(region).ok_or(rust_htslib::errors::Error::Fetch)?;
171        self.query_chrom_start_end(&chrom, start0, end0)
172    }
173
174    /// Query by chromosome and 0-based coordinates.
175    ///
176    /// After calling this, subsequent `next_record` calls will return
177    /// only records overlapping the specified region.
178    pub fn query_chrom_start_end(
179        &mut self,
180        chrom: &str,
181        start0: u64,
182        end0: Option<u64>,
183    ) -> Result<(), rust_htslib::errors::Error> {
184        self.inner.fetch(chrom, start0, end0)
185    }
186
187    /// Query by region string ("chr1:1000-2000") or by coordinates.
188    /// If `start0` is None, treat `region_or_chrom` as a region string.
189    /// Otherwise, treat it as a chromosome name with numeric coordinates.
190    pub fn query(
191        &mut self,
192        region_or_chrom: &str,
193        start0: Option<u64>,
194        end0: Option<u64>,
195    ) -> Result<(), rust_htslib::errors::Error> {
196        match start0 {
197            None => self.query_region_1based(region_or_chrom),
198            Some(s) => self.query_chrom_start_end(region_or_chrom, s, end0),
199        }
200    }
201
202    /// Read the next record.
203    ///
204    /// Returns `Ok(Some(record))` if a record was read, `Ok(None)` at EOF,
205    /// or `Err(...)` on error.
206    pub fn next_record(&mut self) -> Result<Option<bcf::Record>, rust_htslib::errors::Error> {
207        let mut record = self.inner.empty_record();
208        match self.inner.read_record(&mut record) {
209            None => Ok(None),
210            Some(Ok(())) => Ok(Some(record)),
211            Some(Err(e)) => Err(e),
212        }
213    }
214}