1use rust_htslib::bcf;
37use rust_htslib::bcf::header::{HeaderRecord, TagLength, TagType};
38use std::collections::HashMap;
39use std::ffi::CString;
40use std::mem::ManuallyDrop;
41use std::sync::atomic::{AtomicBool, Ordering};
42use std::sync::{Arc, OnceLock};
43
44impl Drop for Header {
45 fn drop(&mut self) {
46 unsafe {
47 rust_htslib::htslib::bcf_hdr_destroy(self.inner);
48 }
49 }
50}
51
52#[derive(Debug)]
63pub struct Header {
64 inner: *mut rust_htslib::htslib::bcf_hdr_t,
65 dirty: AtomicBool,
66 sample_names: Vec<String>,
68 sample_name_to_idx: HashMap<String, usize>,
70 id_to_name_cache: HashMap<u32, (String, Vec<u8>)>,
73
74 translate_view: OnceLock<Arc<rust_htslib::bcf::header::HeaderView>>,
81}
82
83#[derive(Debug, Clone)]
85pub struct HeaderField {
86 pub id: String,
88 pub r#type: String,
90 pub number: String,
92 pub description: String,
94}
95
96unsafe impl Send for Header {}
97unsafe impl Sync for Header {}
98
99impl Header {
100 pub unsafe fn new(inner: *mut rust_htslib::htslib::bcf_hdr_t) -> Self {
108 let inner = rust_htslib::htslib::bcf_hdr_dup(inner);
109 let view = ManuallyDrop::new(bcf::header::HeaderView::new(inner));
110 let sample_count = view.sample_count();
111 let (sample_names, sample_name_to_idx) = if sample_count > 0 {
112 let names: Vec<String> = view
113 .samples()
114 .iter()
115 .map(|s| String::from_utf8_lossy(s).into_owned())
116 .collect();
117 let name_to_idx: HashMap<String, usize> = names
118 .iter()
119 .enumerate()
120 .map(|(i, name)| (name.clone(), i))
121 .collect();
122 (names, name_to_idx)
123 } else {
124 (Vec::new(), HashMap::new())
125 };
126
127 let mut id_to_name_cache: HashMap<u32, (String, Vec<u8>)> = HashMap::new();
129 for record in view.header_records() {
130 let tag_id = match &record {
131 HeaderRecord::Info { values, .. } | HeaderRecord::Format { values, .. } => values
132 .iter()
133 .find(|(k, _)| k.as_str() == "ID")
134 .map(|(_, v)| v.as_str()),
135 _ => None,
136 };
137 if let Some(tag_name) = tag_id {
138 let tag_bytes = tag_name.as_bytes();
139 if let Ok(id) = view.name_to_id(tag_bytes) {
140 let name = tag_name.to_string();
141 let bytes = tag_bytes.to_vec();
142 id_to_name_cache.insert(id.0, (name, bytes));
143 }
144 }
145 }
146
147 Self {
148 inner,
149 dirty: AtomicBool::new(false),
150 sample_names,
151 sample_name_to_idx,
152 id_to_name_cache,
153 translate_view: OnceLock::new(),
154 }
155 }
156
157 pub fn empty() -> Self {
159 let c_str = CString::new(&b"w"[..]).unwrap();
160 let inner = unsafe { rust_htslib::htslib::bcf_hdr_init(c_str.as_ptr()) };
161 Self {
162 inner,
163 dirty: AtomicBool::new(false),
164 sample_names: Vec::new(),
165 sample_name_to_idx: HashMap::new(),
166 id_to_name_cache: HashMap::new(),
167 translate_view: OnceLock::new(),
168 }
169 }
170
171 pub fn inner_ptr(&self) -> *mut rust_htslib::htslib::bcf_hdr_t {
177 self.inner
178 }
179
180 pub fn translate_view(&self) -> Arc<rust_htslib::bcf::header::HeaderView> {
181 self.translate_view
189 .get_or_init(|| {
190 let view = Arc::new(rust_htslib::bcf::header::HeaderView::new(self.inner));
191 std::mem::forget(Arc::clone(&view)); view
193 })
194 .clone()
195 }
196
197 fn view(&self) -> ManuallyDrop<bcf::header::HeaderView> {
199 ManuallyDrop::new(bcf::header::HeaderView::new(self.inner))
200 }
201
202 pub fn header_records(&self) -> Vec<HeaderRecord> {
204 self.view().header_records()
205 }
206
207 pub fn sample_id(&self, sample: &[u8]) -> Option<usize> {
211 match self.view().sample_to_id(sample) {
212 Ok(id) => Some(id.0 as usize),
213 Err(_) => None,
214 }
215 }
216
217 pub fn id_to_name(&self, id: u32) -> Vec<u8> {
222 self.view().id_to_name(bcf::header::Id(id))
223 }
224
225 pub fn id_to_name_cached(&self, id: u32) -> (String, Vec<u8>) {
230 if let Some(cached) = self.id_to_name_cache.get(&id) {
231 return cached.clone();
232 }
233 let bytes = self.view().id_to_name(bcf::header::Id(id));
235 let name = String::from_utf8_lossy(&bytes).into_owned();
236 (name, bytes)
237 }
238
239 pub fn sample_count(&self) -> usize {
241 self.sample_names.len()
242 }
243
244 pub fn sample_names(&self) -> &[String] {
246 &self.sample_names
247 }
248
249 pub fn sample_idx(&self, name: &str) -> Option<usize> {
253 self.sample_name_to_idx.get(name).copied()
254 }
255
256 pub fn sample_name_to_idx(&self) -> &HashMap<String, usize> {
258 &self.sample_name_to_idx
259 }
260
261 pub fn info_type(&self, tag: &[u8]) -> Option<(TagType, TagLength)> {
265 self.view().info_type(tag).ok()
266 }
267
268 pub fn format_type(&self, tag: &[u8]) -> Option<(TagType, TagLength)> {
272 self.view().format_type(tag).ok()
273 }
274
275 pub fn sync(&self) {
279 if !self.dirty.swap(false, Ordering::AcqRel) {
280 return;
281 }
282 unsafe {
283 rust_htslib::htslib::bcf_hdr_sync(self.inner);
284 }
285 }
286
287 pub fn push_record(&self, record: &[u8]) -> bool {
291 let Ok(c_str) = CString::new(record) else {
292 return false;
293 };
294 let r = unsafe { rust_htslib::htslib::bcf_hdr_append(self.inner, c_str.as_ptr()) };
295 self.dirty.store(true, Ordering::Release);
296 self.sync();
297 r == 0
298 }
299
300 pub fn add_info(&self, id: &str, number: &str, ty: &str, description: &str) -> bool {
311 let record =
312 format!("##INFO=<ID={id},Number={number},Type={ty},Description=\"{description}\">",);
313 self.push_record(record.as_bytes())
314 }
315
316 pub fn add_format(&self, id: &str, number: &str, ty: &str, description: &str) -> bool {
327 let record =
328 format!("##FORMAT=<ID={id},Number={number},Type={ty},Description=\"{description}\">",);
329 self.push_record(record.as_bytes())
330 }
331
332 pub fn to_string(&self) -> Option<String> {
336 self.sync();
337
338 let mut s = rust_htslib::htslib::kstring_t {
339 l: 0,
340 m: 0,
341 s: std::ptr::null_mut(),
342 };
343
344 let ret = unsafe { rust_htslib::htslib::bcf_hdr_format(self.inner_ptr(), 0, &mut s) };
345 if ret != 0 {
346 if !s.s.is_null() {
347 unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
348 }
349 return None;
350 }
351
352 let bytes = unsafe { std::slice::from_raw_parts(s.s as *const u8, s.l as usize) };
353 let text = String::from_utf8_lossy(bytes).into_owned();
354
355 if !s.s.is_null() {
356 unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
357 }
358
359 Some(text)
360 }
361
362 pub fn get_field(&self, section: &str, id: &str) -> Option<HeaderField> {
371 let tag_info = match section {
372 "INFO" => self.info_type(id.as_bytes()),
373 "FORMAT" => self.format_type(id.as_bytes()),
374 _ => return None,
375 };
376
377 let (tag_type, tag_length) = tag_info?;
378
379 let mut description = String::new();
380 for record in self.header_records() {
381 match record {
382 HeaderRecord::Info { values, .. } if section == "INFO" => {
383 if values.iter().any(|(k, v)| k.as_str() == "ID" && v == id) {
384 description = values
385 .iter()
386 .find(|(k, _)| k.as_str() == "Description")
387 .map(|(_, v)| unquote(v))
388 .unwrap_or_default();
389 break;
390 }
391 }
392 HeaderRecord::Format { values, .. } if section == "FORMAT" => {
393 if values.iter().any(|(k, v)| k.as_str() == "ID" && v == id) {
394 description = values
395 .iter()
396 .find(|(k, _)| k.as_str() == "Description")
397 .map(|(_, v)| unquote(v))
398 .unwrap_or_default();
399 break;
400 }
401 }
402 _ => {}
403 }
404 }
405
406 Some(HeaderField {
407 id: id.to_string(),
408 r#type: tag_type_to_str(tag_type).to_string(),
409 number: tag_length_to_str(tag_length),
410 description,
411 })
412 }
413
414 pub fn all_fields(&self) -> Vec<(String, HeaderField)> {
418 let mut fields = Vec::new();
419 for record in self.header_records() {
420 match record {
421 HeaderRecord::Info { values, .. } => {
422 if let Some(field) =
423 self.parse_record_to_field("INFO", values.into_iter().collect())
424 {
425 fields.push(("INFO".to_string(), field));
426 }
427 }
428 HeaderRecord::Format { values, .. } => {
429 if let Some(field) =
430 self.parse_record_to_field("FORMAT", values.into_iter().collect())
431 {
432 fields.push(("FORMAT".to_string(), field));
433 }
434 }
435 HeaderRecord::Filter { values, .. } => {
436 if let Some(field) =
437 self.parse_record_to_field("FILTER", values.into_iter().collect())
438 {
439 fields.push(("FILTER".to_string(), field));
440 }
441 }
442 _ => {}
443 }
444 }
445 fields
446 }
447
448 fn parse_record_to_field(
450 &self,
451 section: &str,
452 values: Vec<(String, String)>,
453 ) -> Option<HeaderField> {
454 let id = values
455 .iter()
456 .find(|(k, _)| k.as_str() == "ID")
457 .map(|(_, v)| v.as_str())?;
458
459 let (tag_type, tag_length) = match section {
460 "INFO" => self.info_type(id.as_bytes())?,
461 "FORMAT" => self.format_type(id.as_bytes())?,
462 "FILTER" => (TagType::Flag, TagLength::Fixed(0)), _ => return None,
464 };
465
466 let description = values
467 .iter()
468 .find(|(k, _)| k.as_str() == "Description")
469 .map(|(_, v)| unquote(v))
470 .unwrap_or_default();
471
472 Some(HeaderField {
473 id: id.to_string(),
474 r#type: tag_type_to_str(tag_type).to_string(),
475 number: tag_length_to_str(tag_length),
476 description,
477 })
478 }
479}
480
481fn unquote(s: &str) -> String {
482 if s.starts_with('"') && s.ends_with('"') && s.len() >= 2 {
483 s[1..s.len() - 1].to_string()
484 } else {
485 s.to_string()
486 }
487}
488
489fn tag_type_to_str(t: TagType) -> &'static str {
490 match t {
491 TagType::Flag => "Flag",
492 TagType::Integer => "Integer",
493 TagType::Float => "Float",
494 TagType::String => "String",
495 }
496}
497
498fn tag_length_to_str(l: TagLength) -> String {
499 match l {
500 TagLength::Fixed(n) => n.to_string(),
501 TagLength::AltAlleles => "A".to_string(),
502 TagLength::Alleles => "R".to_string(),
503 TagLength::Genotypes => "G".to_string(),
504 TagLength::Variable => ".".to_string(),
505 }
506}