htsvcf_core/region.rs
1//! Region string parsing utilities.
2//!
3//! This module provides functions for parsing genomic region strings in the
4//! standard format used by samtools, bcftools, and other htslib-based tools.
5//!
6//! # Supported Formats
7//!
8//! - `chr` - entire chromosome
9//! - `chr:start` - from start position to end of chromosome
10//! - `chr:start-end` - specific range
11//!
12//! # Coordinate Systems
13//!
14//! Input coordinates are **1-based inclusive** (standard VCF/genomics convention).
15//! Output coordinates are **0-based** for direct use with htslib APIs.
16//!
17//! # Example
18//!
19//! ```
20//! use htsvcf_core::region::parse_region_1based;
21//!
22//! // Parse "chr1:1000-2000" (1-based input)
23//! let (chrom, start0, end0) = parse_region_1based("chr1:1000-2000").unwrap();
24//! assert_eq!(chrom, "chr1");
25//! assert_eq!(start0, 999); // 0-based
26//! assert_eq!(end0, Some(1999)); // 0-based
27//!
28//! // Commas in numbers are stripped
29//! let (_, start0, _) = parse_region_1based("chr1:1,000,000").unwrap();
30//! assert_eq!(start0, 999_999);
31//! ```
32
33/// Parse a region string like `chr`, `chr:100`, or `chr:100-200`.
34///
35/// Input coordinates are 1-based inclusive, output is 0-based (start) and optional 0-based (end).
36pub fn parse_region_1based(region: &str) -> Option<(String, u64, Option<u64>)> {
37 let (chrom, rest) = region.split_once(':').unwrap_or((region, ""));
38 if chrom.is_empty() {
39 return None;
40 }
41 if rest.is_empty() {
42 return Some((chrom.to_string(), 0, None));
43 }
44
45 let (start_str, end_str) = rest.split_once('-').unwrap_or((rest, ""));
46 let start_1 = start_str.replace(',', "").parse::<u64>().ok()?;
47 let start_0 = start_1.saturating_sub(1);
48
49 if end_str.is_empty() {
50 return Some((chrom.to_string(), start_0, None));
51 }
52
53 let end_1 = end_str.replace(',', "").parse::<u64>().ok()?;
54 let end_0 = end_1.saturating_sub(1);
55 Some((chrom.to_string(), start_0, Some(end_0)))
56}