htsvcf_core/
variant.rs

1//! VCF variant record representation and field access.
2//!
3//! This module provides types and functions for working with VCF/BCF variant
4//! records. The [`Variant`] struct wraps a `bcf::Record` and provides convenient
5//! access to standard VCF fields (CHROM, POS, REF, ALT, etc.) as well as INFO
6//! and FORMAT data.
7//!
8//! # Value Types
9//!
10//! - [`InfoValue`]: Represents INFO field values (scalar, array, flag, or absent)
11//! - [`FormatValue`]: Represents FORMAT field values (per-sample data)
12//!
13//! # Standalone Functions
14//!
15//! For cases where you have a borrowed `bcf::Record` reference (e.g., from a
16//! GcCell in V8 bindings), standalone helper functions are provided:
17//!
18//! - [`record_info`]: Get INFO field from a borrowed record
19//! - [`record_format`]: Get FORMAT field from a borrowed record
20//! - [`record_sample`]: Get all FORMAT fields for a single sample
21//! - [`record_samples`]: Get all FORMAT fields for multiple samples
22//! - [`record_to_string`]: Format record as VCF line
23//!
24//! # Example
25//!
26//! ```no_run
27//! use htsvcf_core::variant::{Variant, InfoValue};
28//! use htsvcf_core::header::Header;
29//! use rust_htslib::bcf::{self, Read};
30//!
31//! let mut reader = bcf::Reader::from_path("input.vcf.gz").unwrap();
32//! let header = unsafe { Header::new(reader.header().inner) };
33//!
34//! for result in reader.records() {
35//!     let record = result.unwrap();
36//!     let variant = Variant::from_record(record);
37//!
38//!     println!("{}:{}", variant.chrom(), variant.pos());
39//!
40//!     // Access INFO fields
41//!     match variant.info(&header, "DP") {
42//!         InfoValue::Int(dp) => println!("DP = {}", dp),
43//!         InfoValue::Absent => println!("No DP"),
44//!         _ => {}
45//!     }
46//! }
47//! ```
48
49use crate::genotype::{
50    parse_genotype, parse_genotype_for_sample, record_genotypes, record_set_genotypes, Genotype,
51};
52use crate::header::Header;
53use rust_htslib::bcf;
54use rust_htslib::bcf::header::{TagLength, TagType};
55use rust_htslib::bcf::record::Numeric;
56use std::ffi::CString;
57
58/// Represents a value from an INFO field in a VCF record.
59///
60/// INFO fields can hold various types of data (flags, integers, floats, strings)
61/// and can be scalar or array-valued. This enum captures all possible states:
62///
63/// - `Absent`: The tag is not present in the record
64/// - `Missing`: The tag is present but has no value (`.` in VCF)
65/// - `Bool`: A flag (presence/absence)
66/// - `Int`: A single integer value
67/// - `Float`: A single float value
68/// - `String`: A single string value
69/// - `Array`: Multiple values of any type
70#[derive(Debug, Clone, PartialEq)]
71pub enum InfoValue {
72    /// The INFO tag is not present in the record.
73    Absent,
74    /// The INFO tag is present but has a missing value (`.`).
75    Missing,
76    /// A boolean flag (true if present).
77    Bool(bool),
78    /// A single integer value.
79    Int(i32),
80    /// A single float value.
81    Float(f32),
82    /// A single string value.
83    String(String),
84    /// An array of values (for Number != 1 fields).
85    Array(Vec<InfoValue>),
86}
87
88/// Represents a value from a FORMAT field in a VCF record.
89///
90/// FORMAT fields contain per-sample data and can hold integers, floats, or strings.
91/// Values can be scalar, array-valued, or organized per-sample.
92///
93/// - `Absent`: The tag is not present in the record
94/// - `Missing`: The tag is present but has no value (`.` in VCF)
95/// - `Int`: A single integer value
96/// - `Float`: A single float value
97/// - `String`: A single string value
98/// - `Array`: Multiple values (for Number != 1 fields)
99/// - `PerSample`: A vector of values, one per sample in the VCF
100#[derive(Debug, Clone, PartialEq)]
101pub enum FormatValue {
102    /// The FORMAT tag is not present in the record.
103    Absent,
104    /// The FORMAT tag is present but has a missing value (`.`).
105    Missing,
106    /// A single integer value.
107    Int(i32),
108    /// A single float value.
109    Float(f32),
110    /// A single string value.
111    String(String),
112    /// An array of values (for Number != 1 fields).
113    Array(Vec<FormatValue>),
114    /// Per-sample values, one entry per sample in the VCF.
115    PerSample(Vec<FormatValue>),
116    /// A parsed genotype value (for the GT field).
117    Genotype(Genotype),
118}
119
120// ============================================================================
121// Public helper functions for working with bcf::Record references directly.
122// These allow bindings (like v8) that can't own the record to still use the
123// core logic.
124// ============================================================================
125
126/// Get an INFO field value from a record.
127///
128/// This is the standalone version of `Variant::info()` that can be used when
129/// you have a borrowed reference to a record (e.g., from a GcCell).
130pub fn record_info(record: &bcf::Record, header: &Header, tag: &str) -> InfoValue {
131    let (tag_type, tag_length) = match header.info_type(tag.as_bytes()) {
132        Some(v) => v,
133        None => return InfoValue::Absent,
134    };
135
136    match tag_type {
137        TagType::Flag => match header_info_flag(header, record, tag.as_bytes()) {
138            Ok(v) => InfoValue::Bool(v),
139            Err(InfoError::Absent) => InfoValue::Absent,
140            Err(InfoError::Other) => InfoValue::Absent,
141        },
142        TagType::Integer => match header_info_values_i32(header, record, tag.as_bytes()) {
143            Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Int),
144            Err(InfoError::Absent) => InfoValue::Absent,
145            Err(InfoError::Other) => InfoValue::Absent,
146        },
147        TagType::Float => match header_info_values_f32(header, record, tag.as_bytes()) {
148            Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Float),
149            Err(InfoError::Absent) => InfoValue::Absent,
150            Err(InfoError::Other) => InfoValue::Absent,
151        },
152        TagType::String => match header_info_values_string(header, record, tag.as_bytes()) {
153            Ok(v) => string_to_infovalue(v, tag_length),
154            Err(InfoError::Absent) => InfoValue::Absent,
155            Err(InfoError::Other) => InfoValue::Absent,
156        },
157    }
158}
159
160/// Get a FORMAT field value from a record (per-sample).
161///
162/// This is the standalone version of `Variant::format()` that can be used when
163/// you have a borrowed reference to a record (e.g., from a GcCell).
164pub fn record_format(record: &bcf::Record, header: &Header, tag: &str) -> FormatValue {
165    let (tag_type, tag_length) = match header.format_type(tag.as_bytes()) {
166        Some(v) => v,
167        None => return FormatValue::Absent,
168    };
169
170    let sample_count = record.sample_count() as usize;
171
172    match tag_type {
173        TagType::Integer => match record.format(tag.as_bytes()).integer() {
174            Ok(values) => FormatValue::PerSample(
175                values
176                    .iter()
177                    .take(sample_count)
178                    .map(|per_sample| {
179                        format_numeric_to_value(per_sample, tag_length, FormatValue::Int)
180                    })
181                    .collect(),
182            ),
183            Err(_) => FormatValue::Absent,
184        },
185        TagType::Float => match record.format(tag.as_bytes()).float() {
186            Ok(values) => FormatValue::PerSample(
187                values
188                    .iter()
189                    .take(sample_count)
190                    .map(|per_sample| {
191                        format_numeric_to_value(per_sample, tag_length, FormatValue::Float)
192                    })
193                    .collect(),
194            ),
195            Err(_) => FormatValue::Absent,
196        },
197        TagType::String => match record.format(tag.as_bytes()).string() {
198            Ok(values) => FormatValue::PerSample(
199                values
200                    .iter()
201                    .take(sample_count)
202                    .map(|per_sample| format_string_to_value(per_sample, tag_length))
203                    .collect(),
204            ),
205            Err(_) => FormatValue::Absent,
206        },
207        TagType::Flag => FormatValue::Absent,
208    }
209}
210
211/// Get sample data from a record for a single sample by name.
212///
213/// This is the standalone version of `Variant::sample()` that can be used when
214/// you have a borrowed reference to a record (e.g., from a GcCell).
215pub fn record_sample(
216    record: &bcf::Record,
217    header: &Header,
218    sample: &str,
219) -> Option<Vec<(String, FormatValue)>> {
220    let sample_id = header.sample_id(sample.as_bytes())?;
221    let sample_count = record.sample_count() as usize;
222    if sample_id >= sample_count {
223        return None;
224    }
225
226    let format_tags = get_format_tag_names(header, record);
227    let mut out: Vec<(String, FormatValue)> = Vec::with_capacity(format_tags.len() + 2);
228
229    for (tag_name, tag_bytes) in format_tags {
230        let Some(value) = format_value_for_sample(header, record, &tag_bytes, sample_id) else {
231            continue;
232        };
233        out.push((tag_name, value));
234    }
235
236    // Add parsed genotype if GT field exists
237    if let Some(gt) = parse_genotype_for_sample(record, sample_id) {
238        out.push(("genotype".to_string(), FormatValue::Genotype(gt)));
239    }
240
241    // Include the sample name so JS bindings can expose it.
242    // Set it last so it can't be overwritten by a FORMAT tag named "sample_name".
243    out.push((
244        "sample_name".to_string(),
245        FormatValue::String(sample.to_string()),
246    ));
247
248    Some(out)
249}
250
251/// Get sample data from a record for all samples or a subset.
252///
253/// This is the standalone version of `Variant::samples()` that can be used when
254/// you have a borrowed reference to a record (e.g., from a GcCell).
255pub fn record_samples(
256    record: &bcf::Record,
257    header: &Header,
258    subset: Option<&[&str]>,
259) -> Vec<Vec<(String, FormatValue)>> {
260    let sample_count = record.sample_count() as usize;
261    if sample_count == 0 {
262        return Vec::new();
263    }
264
265    let sample_names = header.sample_names();
266    let format_tags = get_format_tag_names(header, record);
267
268    // Determine which sample indices to include and in what order
269    let sample_indices: Vec<usize> = match subset {
270        None => (0..sample_count).collect(),
271        Some(names) => {
272            let name_to_idx = header.sample_name_to_idx();
273            names
274                .iter()
275                .filter_map(|name| name_to_idx.get(*name).copied())
276                .collect()
277        }
278    };
279
280    if sample_indices.is_empty() {
281        return Vec::new();
282    }
283
284    // Pre-allocate result vectors for each requested sample
285    let mut results: Vec<Vec<(String, FormatValue)>> = sample_indices
286        .iter()
287        .map(|_| Vec::with_capacity(format_tags.len() + 1))
288        .collect();
289
290    // For each FORMAT tag, fetch values for ALL samples at once and distribute to requested ones
291    for (tag_name, tag_bytes) in &format_tags {
292        let Some((tag_type, tag_length)) = header.format_type(tag_bytes) else {
293            continue;
294        };
295
296        match tag_type {
297            bcf::header::TagType::Integer => {
298                let Ok(all_values) = record.format(tag_bytes).integer() else {
299                    continue;
300                };
301                for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
302                    if let Some(per_sample) = all_values.get(sample_idx) {
303                        let value =
304                            format_numeric_to_value(per_sample, tag_length, FormatValue::Int);
305                        results[result_idx].push((tag_name.clone(), value));
306                    }
307                }
308            }
309            bcf::header::TagType::Float => {
310                let Ok(all_values) = record.format(tag_bytes).float() else {
311                    continue;
312                };
313                for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
314                    if let Some(per_sample) = all_values.get(sample_idx) {
315                        let value =
316                            format_numeric_to_value(per_sample, tag_length, FormatValue::Float);
317                        results[result_idx].push((tag_name.clone(), value));
318                    }
319                }
320            }
321            bcf::header::TagType::String => {
322                let Ok(all_values) = record.format(tag_bytes).string() else {
323                    continue;
324                };
325                for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
326                    if let Some(per_sample) = all_values.get(sample_idx) {
327                        let value = format_string_to_value(per_sample, tag_length);
328                        results[result_idx].push((tag_name.clone(), value));
329                    }
330                }
331            }
332            bcf::header::TagType::Flag => {
333                // Flags are not valid for FORMAT
334            }
335        }
336    }
337
338    // Add parsed genotypes if GT field exists
339    if let Ok(gts) = record.genotypes() {
340        for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
341            let gt = parse_genotype(&gts.get(sample_idx));
342            results[result_idx].push(("genotype".to_string(), FormatValue::Genotype(gt)));
343        }
344    }
345
346    // Add sample_name to each result (last, so it can't be overwritten by a FORMAT tag)
347    for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
348        let name = sample_names
349            .get(sample_idx)
350            .cloned()
351            .unwrap_or_else(|| format!("sample_{sample_idx}"));
352        results[result_idx].push(("sample_name".to_string(), FormatValue::String(name)));
353    }
354
355    results
356}
357
358/// Get the list of FORMAT tag names present in a record.
359pub fn get_format_tag_names(header: &Header, record: &bcf::Record) -> Vec<(String, Vec<u8>)> {
360    let record_ptr =
361        record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
362
363    let n_fmt = unsafe { (*record_ptr).n_fmt() as usize };
364    let fmt_ptr = unsafe { (*record_ptr).d.fmt };
365
366    if fmt_ptr.is_null() || n_fmt == 0 {
367        return Vec::new();
368    }
369
370    let mut tags = Vec::with_capacity(n_fmt);
371    for i in 0..n_fmt {
372        let fmt = unsafe { *fmt_ptr.add(i) };
373        let (tag_name, tag_bytes) = header.id_to_name_cached(fmt.id as u32);
374        tags.push((tag_name, tag_bytes));
375    }
376    tags
377}
378
379/// Format a record as a VCF line string.
380pub fn record_to_string(record: &bcf::Record, header: &Header) -> Option<String> {
381    let mut s = rust_htslib::htslib::kstring_t {
382        l: 0,
383        m: 0,
384        s: std::ptr::null_mut(),
385    };
386
387    let record_ptr =
388        record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
389
390    let _ = unsafe {
391        rust_htslib::htslib::bcf_unpack(record_ptr, rust_htslib::htslib::BCF_UN_ALL as i32)
392    };
393
394    let ret = unsafe {
395        rust_htslib::htslib::vcf_format(
396            header.inner_ptr() as *const rust_htslib::htslib::bcf_hdr_t,
397            record_ptr as *const rust_htslib::htslib::bcf1_t,
398            &mut s,
399        )
400    };
401    if ret != 0 {
402        if !s.s.is_null() {
403            unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
404        }
405        return None;
406    }
407
408    let bytes = unsafe { std::slice::from_raw_parts(s.s as *const u8, s.l as usize) };
409    let text = String::from_utf8_lossy(bytes).into_owned();
410
411    if !s.s.is_null() {
412        unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
413    }
414
415    Some(text.trim_end_matches('\n').to_string())
416}
417
418/// Set an INFO flag value on a record.
419pub fn record_set_info_flag(
420    record: &mut bcf::Record,
421    header: &Header,
422    tag: &str,
423    is_set: bool,
424) -> Result<(), rust_htslib::errors::Error> {
425    let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
426        rust_htslib::errors::Error::BcfUndefinedTag {
427            tag: tag.to_string(),
428        }
429    })?;
430
431    if tag_type != TagType::Flag {
432        return Err(rust_htslib::errors::Error::BcfSetTag {
433            tag: tag.to_string(),
434        });
435    }
436
437    if is_set {
438        record.push_info_flag(tag.as_bytes())?;
439    } else {
440        record.clear_info_flag(tag.as_bytes())?;
441    }
442
443    record.unpack();
444    Ok(())
445}
446
447/// Set an INFO integer value on a record.
448pub fn record_set_info_integer(
449    record: &mut bcf::Record,
450    header: &Header,
451    tag: &str,
452    values: &[i32],
453) -> Result<(), rust_htslib::errors::Error> {
454    let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
455        rust_htslib::errors::Error::BcfUndefinedTag {
456            tag: tag.to_string(),
457        }
458    })?;
459
460    if tag_type != TagType::Integer {
461        return Err(rust_htslib::errors::Error::BcfSetTag {
462            tag: tag.to_string(),
463        });
464    }
465
466    record.push_info_integer(tag.as_bytes(), values)?;
467    record.unpack();
468    Ok(())
469}
470
471/// Set an INFO float value on a record.
472pub fn record_set_info_float(
473    record: &mut bcf::Record,
474    header: &Header,
475    tag: &str,
476    values: &[f32],
477) -> Result<(), rust_htslib::errors::Error> {
478    let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
479        rust_htslib::errors::Error::BcfUndefinedTag {
480            tag: tag.to_string(),
481        }
482    })?;
483
484    if tag_type != TagType::Float {
485        return Err(rust_htslib::errors::Error::BcfSetTag {
486            tag: tag.to_string(),
487        });
488    }
489
490    record.push_info_float(tag.as_bytes(), values)?;
491    record.unpack();
492    Ok(())
493}
494
495/// Set an INFO string value on a record.
496pub fn record_set_info_string(
497    record: &mut bcf::Record,
498    header: &Header,
499    tag: &str,
500    values: &[String],
501) -> Result<(), rust_htslib::errors::Error> {
502    let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
503        rust_htslib::errors::Error::BcfUndefinedTag {
504            tag: tag.to_string(),
505        }
506    })?;
507
508    if tag_type != TagType::String {
509        return Err(rust_htslib::errors::Error::BcfSetTag {
510            tag: tag.to_string(),
511        });
512    }
513
514    let refs: Vec<&[u8]> = values.iter().map(|s| s.as_bytes()).collect();
515    record.push_info_string(tag.as_bytes(), &refs)?;
516    record.unpack();
517    Ok(())
518}
519
520/// Clear an INFO field from a record.
521pub fn record_clear_info(
522    record: &mut bcf::Record,
523    header: &Header,
524    tag: &str,
525) -> Result<(), rust_htslib::errors::Error> {
526    let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
527        rust_htslib::errors::Error::BcfUndefinedTag {
528            tag: tag.to_string(),
529        }
530    })?;
531
532    match tag_type {
533        TagType::Flag => record.clear_info_flag(tag.as_bytes())?,
534        TagType::Integer => record.clear_info_integer(tag.as_bytes())?,
535        TagType::Float => record.clear_info_float(tag.as_bytes())?,
536        TagType::String => record.clear_info_string(tag.as_bytes())?,
537    }
538
539    record.unpack();
540    Ok(())
541}
542
543// ============================================================================
544// FORMAT field setters
545// ============================================================================
546
547/// Missing value sentinel for i32 FORMAT fields.
548/// This matches htslib's bcf_int32_missing.
549const FORMAT_MISSING_INT: i32 = i32::MIN;
550
551/// Missing value sentinel for f32 FORMAT fields.
552/// This matches htslib's bcf_float_missing (a specific NaN).
553fn format_missing_float() -> f32 {
554    f32::from_bits(0x7F80_0001)
555}
556
557/// Set a FORMAT integer field on a record.
558///
559/// The `values` slice should be flattened: for a field with `n` values per sample
560/// and `s` samples, provide `s * n` values in sample-major order:
561/// `[sample0_val0, sample0_val1, ..., sample1_val0, sample1_val1, ...]`
562///
563/// Use `FORMAT_MISSING_INT` (`i32::MIN`) to represent missing values.
564///
565/// # Errors
566///
567/// Returns an error if:
568/// - The tag is not defined in the header
569/// - The tag is not an Integer type
570/// - The tag is "GT" (use dedicated genotype methods instead)
571pub fn record_set_format_integer(
572    record: &mut bcf::Record,
573    header: &Header,
574    tag: &str,
575    values: &[i32],
576) -> Result<(), rust_htslib::errors::Error> {
577    if tag == "GT" {
578        return Err(rust_htslib::errors::Error::BcfSetTag {
579            tag: "GT cannot be set via set_format; use dedicated genotype methods".to_string(),
580        });
581    }
582
583    let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
584        rust_htslib::errors::Error::BcfUndefinedTag {
585            tag: tag.to_string(),
586        }
587    })?;
588
589    if tag_type != TagType::Integer {
590        return Err(rust_htslib::errors::Error::BcfSetTag {
591            tag: tag.to_string(),
592        });
593    }
594
595    record.push_format_integer(tag.as_bytes(), values)?;
596    record.unpack();
597    Ok(())
598}
599
600/// Set a FORMAT float field on a record.
601///
602/// The `values` slice should be flattened: for a field with `n` values per sample
603/// and `s` samples, provide `s * n` values in sample-major order:
604/// `[sample0_val0, sample0_val1, ..., sample1_val0, sample1_val1, ...]`
605///
606/// Use `format_missing_float()` to represent missing values.
607///
608/// # Errors
609///
610/// Returns an error if:
611/// - The tag is not defined in the header
612/// - The tag is not a Float type
613pub fn record_set_format_float(
614    record: &mut bcf::Record,
615    header: &Header,
616    tag: &str,
617    values: &[f32],
618) -> Result<(), rust_htslib::errors::Error> {
619    let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
620        rust_htslib::errors::Error::BcfUndefinedTag {
621            tag: tag.to_string(),
622        }
623    })?;
624
625    if tag_type != TagType::Float {
626        return Err(rust_htslib::errors::Error::BcfSetTag {
627            tag: tag.to_string(),
628        });
629    }
630
631    record.push_format_float(tag.as_bytes(), values)?;
632    record.unpack();
633    Ok(())
634}
635
636/// Set a FORMAT string field on a record.
637///
638/// Provide one string per sample. For multi-value string fields, concatenate
639/// values with commas within each sample's string.
640///
641/// # Errors
642///
643/// Returns an error if:
644/// - The tag is not defined in the header
645/// - The tag is not a String type
646/// - The tag is "GT" (use dedicated genotype methods instead)
647pub fn record_set_format_string(
648    record: &mut bcf::Record,
649    header: &Header,
650    tag: &str,
651    values: &[String],
652) -> Result<(), rust_htslib::errors::Error> {
653    if tag == "GT" {
654        return Err(rust_htslib::errors::Error::BcfSetTag {
655            tag: "GT cannot be set via set_format; use dedicated genotype methods".to_string(),
656        });
657    }
658
659    let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
660        rust_htslib::errors::Error::BcfUndefinedTag {
661            tag: tag.to_string(),
662        }
663    })?;
664
665    if tag_type != TagType::String {
666        return Err(rust_htslib::errors::Error::BcfSetTag {
667            tag: tag.to_string(),
668        });
669    }
670
671    let refs: Vec<&[u8]> = values.iter().map(|s| s.as_bytes()).collect();
672    record.push_format_string(tag.as_bytes(), &refs)?;
673    record.unpack();
674    Ok(())
675}
676
677/// Clear (remove) a FORMAT field from a record.
678///
679/// # Errors
680///
681/// Returns an error if the tag is not defined in the header.
682pub fn record_clear_format(
683    record: &mut bcf::Record,
684    header: &Header,
685    tag: &str,
686) -> Result<(), rust_htslib::errors::Error> {
687    if tag == "GT" {
688        return Err(rust_htslib::errors::Error::BcfSetTag {
689            tag: "GT cannot be cleared via clear_format".to_string(),
690        });
691    }
692
693    let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
694        rust_htslib::errors::Error::BcfUndefinedTag {
695            tag: tag.to_string(),
696        }
697    })?;
698
699    // To clear a FORMAT field, we call the appropriate push method with an empty slice.
700    // This is how htslib handles clearing FORMAT fields.
701    match tag_type {
702        TagType::Integer => record.push_format_integer(tag.as_bytes(), &[])?,
703        TagType::Float => record.push_format_float(tag.as_bytes(), &[])?,
704        TagType::String => record.push_format_string::<&[u8]>(tag.as_bytes(), &[])?,
705        TagType::Flag => {
706            // FORMAT flags are rare but handle them
707            return Err(rust_htslib::errors::Error::BcfSetTag {
708                tag: format!("FORMAT/{tag} is a Flag type which is not supported"),
709            });
710        }
711    }
712
713    record.unpack();
714    Ok(())
715}
716
717/// Get the missing value sentinel for FORMAT integer fields.
718pub fn format_int_missing() -> i32 {
719    FORMAT_MISSING_INT
720}
721
722/// Get the missing value sentinel for FORMAT float fields.
723pub fn format_float_missing() -> f32 {
724    format_missing_float()
725}
726
727/// A VCF/BCF variant record with convenient field accessors.
728///
729/// `Variant` wraps a `rust_htslib::bcf::Record` and provides methods for
730/// accessing standard VCF fields (CHROM, POS, REF, ALT, etc.) as well as
731/// INFO and FORMAT data.
732///
733/// # Example
734///
735/// ```no_run
736/// use htsvcf_core::variant::Variant;
737/// use rust_htslib::bcf::{self, Read};
738///
739/// let mut reader = bcf::Reader::from_path("input.vcf.gz").unwrap();
740/// for result in reader.records() {
741///     let record = result.unwrap();
742///     let variant = Variant::from_record(record);
743///     println!("{}:{} {}", variant.chrom(), variant.pos(), variant.reference());
744/// }
745/// ```
746#[derive(Debug)]
747pub struct Variant {
748    record: bcf::Record,
749    chrom: String,
750}
751
752impl Variant {
753    /// Create a `Variant` from a `bcf::Record`.
754    ///
755    /// The record is unpacked and the chromosome name is cached for efficient access.
756    pub fn from_record(mut record: bcf::Record) -> Self {
757        record.unpack();
758        let chrom = match record.rid() {
759            Some(rid) => record
760                .header()
761                .rid2name(rid)
762                .ok()
763                .map(|name| String::from_utf8_lossy(name).into_owned())
764                .unwrap_or_else(|| ".".to_string()),
765            None => ".".to_string(),
766        };
767        Self { record, chrom }
768    }
769
770    /// Consume this Variant and return the underlying `bcf::Record`.
771    ///
772    /// This is primarily used by writer bindings so `write(variant)` can consume
773    /// a JS `Variant` without cloning.
774    pub fn into_record(self) -> bcf::Record {
775        self.record
776    }
777
778    /// Get a mutable reference to the underlying `bcf::Record`.
779    ///
780    /// This is useful for passing the record to functions that need `&mut bcf::Record`,
781    /// such as [`Writer::write_record`](crate::Writer::write_record).
782    pub fn record_mut(&mut self) -> &mut bcf::Record {
783        &mut self.record
784    }
785
786    /// Get the chromosome/contig name (CHROM column).
787    pub fn chrom(&self) -> &str {
788        &self.chrom
789    }
790
791    /// Get the reference sequence ID (rid) from the header, if present.
792    pub fn rid(&self) -> Option<u32> {
793        self.record.rid()
794    }
795
796    /// Get the zero-based start position.
797    ///
798    /// This is the internal representation used by htslib. For 1-based VCF
799    /// coordinates, use [`pos()`](Self::pos).
800    pub fn start(&self) -> i64 {
801        self.record.pos()
802    }
803
804    /// Get the 1-based position (POS column).
805    ///
806    /// This matches the coordinate shown in VCF files.
807    pub fn pos(&self) -> i64 {
808        self.record.pos() + 1
809    }
810
811    /// Get the end coordinate (htslib semantics).
812    ///
813    /// For SNPs this equals `start + 1`. For indels and other variants,
814    /// this reflects the span of the reference allele.
815    pub fn end(&self) -> i64 {
816        self.record.end()
817    }
818
819    /// Get the variant ID (ID column).
820    ///
821    /// Returns "." if no ID is set.
822    pub fn id(&self) -> String {
823        String::from_utf8_lossy(&self.record.id()).into_owned()
824    }
825
826    /// Set the variant ID (ID column).
827    ///
828    /// Pass an empty string or "." to clear the ID.
829    pub fn set_id(&mut self, id: &str) -> Result<(), rust_htslib::errors::Error> {
830        let id = if id.is_empty() { "." } else { id };
831        self.record.set_id(id.as_bytes())?;
832        Ok(())
833    }
834
835    /// Get the reference allele (REF column).
836    pub fn reference(&self) -> String {
837        self.record
838            .alleles()
839            .first()
840            .map(|a| String::from_utf8_lossy(a).into_owned())
841            .unwrap_or_else(|| ".".to_string())
842    }
843
844    /// Get the alternate alleles (ALT column).
845    ///
846    /// Returns a vector of alternate allele strings. May be empty if there
847    /// are no alternates.
848    pub fn alts(&self) -> Vec<String> {
849        self.record
850            .alleles()
851            .into_iter()
852            .skip(1)
853            .map(|a| String::from_utf8_lossy(a).into_owned())
854            .collect()
855    }
856
857    /// Get the quality score (QUAL column).
858    ///
859    /// Returns `None` if QUAL is missing (`.` in VCF).
860    pub fn qual(&self) -> Option<f32> {
861        let qual = self.record.qual();
862        if qual.is_missing() {
863            None
864        } else {
865            Some(qual)
866        }
867    }
868
869    /// Set the quality score (QUAL column).
870    ///
871    /// Pass `None` to set QUAL to missing (`.`).
872    pub fn set_qual(&mut self, qual: Option<f32>) {
873        match qual {
874            Some(v) => self.record.set_qual(v),
875            None => self.record.set_qual(<f32 as Numeric>::missing()),
876        }
877    }
878
879    /// Return the FILTER column as a list of filter IDs.
880    ///
881    /// Records that are '.' return an empty list.
882    pub fn filters(&self) -> Vec<String> {
883        let header = self.record.header();
884        let mut out = Vec::new();
885        for id in self.record.filters() {
886            let name = String::from_utf8_lossy(&header.id_to_name(id)).into_owned();
887            out.push(name);
888        }
889        out
890    }
891
892    /// Set the FILTER column.
893    ///
894    /// Pass an empty slice, `[""]`, or `["."]` to clear all filters.
895    /// Otherwise, provide an array of filter names to set.
896    pub fn set_filters(&mut self, filters: &[String]) -> Result<(), rust_htslib::errors::Error> {
897        let want_clear = filters.is_empty()
898            || (filters.len() == 1 && (filters[0].is_empty() || filters[0] == "."));
899
900        if want_clear {
901            let refs: Vec<&[u8]> = Vec::new();
902            self.record.set_filters(&refs)?;
903            return Ok(());
904        }
905
906        let refs: Vec<&[u8]> = filters.iter().map(|s| s.as_bytes()).collect();
907        self.record.set_filters(&refs)?;
908        Ok(())
909    }
910
911    /// Set an INFO flag value.
912    ///
913    /// Pass `true` to set the flag, `false` to clear it.
914    /// Returns an error if the tag is not defined in the header or is not a Flag type.
915    pub fn set_info_flag(
916        &mut self,
917        header: &Header,
918        tag: &str,
919        is_set: bool,
920    ) -> Result<(), rust_htslib::errors::Error> {
921        let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
922            rust_htslib::errors::Error::BcfUndefinedTag {
923                tag: tag.to_string(),
924            }
925        })?;
926
927        if tag_type != TagType::Flag {
928            return Err(rust_htslib::errors::Error::BcfSetTag {
929                tag: tag.to_string(),
930            });
931        }
932
933        if is_set {
934            self.record.push_info_flag(tag.as_bytes())?;
935        } else {
936            self.record.clear_info_flag(tag.as_bytes())?;
937        }
938
939        self.record.unpack();
940        Ok(())
941    }
942
943    /// Set an INFO integer value.
944    ///
945    /// Pass a slice of integers to set. For scalar fields (Number=1), pass a single-element slice.
946    /// Returns an error if the tag is not defined in the header or is not an Integer type.
947    pub fn set_info_integer(
948        &mut self,
949        header: &Header,
950        tag: &str,
951        values: &[i32],
952    ) -> Result<(), rust_htslib::errors::Error> {
953        let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
954            rust_htslib::errors::Error::BcfUndefinedTag {
955                tag: tag.to_string(),
956            }
957        })?;
958
959        if tag_type != TagType::Integer {
960            return Err(rust_htslib::errors::Error::BcfSetTag {
961                tag: tag.to_string(),
962            });
963        }
964
965        self.record.push_info_integer(tag.as_bytes(), values)?;
966        self.record.unpack();
967        Ok(())
968    }
969
970    /// Set an INFO float value.
971    ///
972    /// Pass a slice of floats to set. For scalar fields (Number=1), pass a single-element slice.
973    /// Returns an error if the tag is not defined in the header or is not a Float type.
974    pub fn set_info_float(
975        &mut self,
976        header: &Header,
977        tag: &str,
978        values: &[f32],
979    ) -> Result<(), rust_htslib::errors::Error> {
980        let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
981            rust_htslib::errors::Error::BcfUndefinedTag {
982                tag: tag.to_string(),
983            }
984        })?;
985
986        if tag_type != TagType::Float {
987            return Err(rust_htslib::errors::Error::BcfSetTag {
988                tag: tag.to_string(),
989            });
990        }
991
992        self.record.push_info_float(tag.as_bytes(), values)?;
993        self.record.unpack();
994        Ok(())
995    }
996
997    /// Set an INFO string value.
998    ///
999    /// Pass a slice of strings to set. For scalar fields (Number=1), pass a single-element slice.
1000    /// Returns an error if the tag is not defined in the header or is not a String type.
1001    pub fn set_info_string(
1002        &mut self,
1003        header: &Header,
1004        tag: &str,
1005        values: &[String],
1006    ) -> Result<(), rust_htslib::errors::Error> {
1007        let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
1008            rust_htslib::errors::Error::BcfUndefinedTag {
1009                tag: tag.to_string(),
1010            }
1011        })?;
1012
1013        if tag_type != TagType::String {
1014            return Err(rust_htslib::errors::Error::BcfSetTag {
1015                tag: tag.to_string(),
1016            });
1017        }
1018
1019        let refs: Vec<&[u8]> = values.iter().map(|s| s.as_bytes()).collect();
1020        self.record.push_info_string(tag.as_bytes(), &refs)?;
1021        self.record.unpack();
1022        Ok(())
1023    }
1024
1025    /// Translate this record to a new header.
1026    ///
1027    /// This is required when you mutate the header (e.g. add a new INFO field)
1028    /// and then want to set values for those new tags.
1029    ///
1030    /// IMPORTANT: this does not duplicate/copy the header.
1031    pub fn translate(&mut self, header: &Header) -> Result<(), rust_htslib::errors::Error> {
1032        let mut view = header.translate_view();
1033        self.record.translate(&mut view)
1034    }
1035
1036    /// Clear (remove) an INFO field from the record.
1037    ///
1038    /// Returns an error if the tag is not defined in the header.
1039    pub fn clear_info(
1040        &mut self,
1041        header: &Header,
1042        tag: &str,
1043    ) -> Result<(), rust_htslib::errors::Error> {
1044        let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
1045            rust_htslib::errors::Error::BcfUndefinedTag {
1046                tag: tag.to_string(),
1047            }
1048        })?;
1049
1050        match tag_type {
1051            TagType::Flag => self.record.clear_info_flag(tag.as_bytes())?,
1052            TagType::Integer => self.record.clear_info_integer(tag.as_bytes())?,
1053            TagType::Float => self.record.clear_info_float(tag.as_bytes())?,
1054            TagType::String => self.record.clear_info_string(tag.as_bytes())?,
1055        }
1056
1057        self.record.unpack();
1058        Ok(())
1059    }
1060
1061    /// Set a FORMAT integer field.
1062    ///
1063    /// The `values` slice should be flattened: for a field with `n` values per sample
1064    /// and `s` samples, provide `s * n` values in sample-major order.
1065    ///
1066    /// Use [`format_int_missing()`] to represent missing values.
1067    ///
1068    /// # Errors
1069    ///
1070    /// Returns an error if the tag is not defined, is not Integer type, or is "GT".
1071    pub fn set_format_integer(
1072        &mut self,
1073        header: &Header,
1074        tag: &str,
1075        values: &[i32],
1076    ) -> Result<(), rust_htslib::errors::Error> {
1077        record_set_format_integer(&mut self.record, header, tag, values)
1078    }
1079
1080    /// Set a FORMAT float field.
1081    ///
1082    /// The `values` slice should be flattened: for a field with `n` values per sample
1083    /// and `s` samples, provide `s * n` values in sample-major order.
1084    ///
1085    /// Use [`format_float_missing()`] to represent missing values.
1086    ///
1087    /// # Errors
1088    ///
1089    /// Returns an error if the tag is not defined or is not Float type.
1090    pub fn set_format_float(
1091        &mut self,
1092        header: &Header,
1093        tag: &str,
1094        values: &[f32],
1095    ) -> Result<(), rust_htslib::errors::Error> {
1096        record_set_format_float(&mut self.record, header, tag, values)
1097    }
1098
1099    /// Set a FORMAT string field.
1100    ///
1101    /// Provide one string per sample.
1102    ///
1103    /// # Errors
1104    ///
1105    /// Returns an error if the tag is not defined, is not String type, or is "GT".
1106    pub fn set_format_string(
1107        &mut self,
1108        header: &Header,
1109        tag: &str,
1110        values: &[String],
1111    ) -> Result<(), rust_htslib::errors::Error> {
1112        record_set_format_string(&mut self.record, header, tag, values)
1113    }
1114
1115    /// Clear (remove) a FORMAT field from this record.
1116    ///
1117    /// # Errors
1118    ///
1119    /// Returns an error if the tag is not defined in the header or is "GT".
1120    pub fn clear_format(
1121        &mut self,
1122        header: &Header,
1123        tag: &str,
1124    ) -> Result<(), rust_htslib::errors::Error> {
1125        record_clear_format(&mut self.record, header, tag)
1126    }
1127
1128    /// Get an INFO field value by tag name.
1129    ///
1130    /// Returns the appropriate [`InfoValue`] variant based on the tag's type
1131    /// as defined in the header. Returns [`InfoValue::Absent`] if the tag
1132    /// is not present in this record.
1133    pub fn info(&self, header: &Header, tag: &str) -> InfoValue {
1134        let (tag_type, tag_length) = match header.info_type(tag.as_bytes()) {
1135            Some(v) => v,
1136            None => return InfoValue::Absent,
1137        };
1138
1139        match tag_type {
1140            TagType::Flag => match header_info_flag(header, &self.record, tag.as_bytes()) {
1141                Ok(v) => InfoValue::Bool(v),
1142                Err(InfoError::Absent) => InfoValue::Absent,
1143                Err(InfoError::Other) => InfoValue::Absent,
1144            },
1145            TagType::Integer => {
1146                match header_info_values_i32(header, &self.record, tag.as_bytes()) {
1147                    Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Int),
1148                    Err(InfoError::Absent) => InfoValue::Absent,
1149                    Err(InfoError::Other) => InfoValue::Absent,
1150                }
1151            }
1152            TagType::Float => match header_info_values_f32(header, &self.record, tag.as_bytes()) {
1153                Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Float),
1154                Err(InfoError::Absent) => InfoValue::Absent,
1155                Err(InfoError::Other) => InfoValue::Absent,
1156            },
1157            TagType::String => {
1158                match header_info_values_string(header, &self.record, tag.as_bytes()) {
1159                    Ok(v) => string_to_infovalue(v, tag_length),
1160                    Err(InfoError::Absent) => InfoValue::Absent,
1161                    Err(InfoError::Other) => InfoValue::Absent,
1162                }
1163            }
1164        }
1165    }
1166
1167    /// Get a FORMAT field value by tag name.
1168    ///
1169    /// Returns a [`FormatValue::PerSample`] containing values for all samples,
1170    /// or [`FormatValue::Absent`] if the tag is not present in this record.
1171    pub fn format(&self, header: &Header, tag: &str) -> FormatValue {
1172        let (tag_type, tag_length) = match header.format_type(tag.as_bytes()) {
1173            Some(v) => v,
1174            None => return FormatValue::Absent,
1175        };
1176
1177        let sample_count = self.record.sample_count() as usize;
1178
1179        match tag_type {
1180            TagType::Integer => match self.record.format(tag.as_bytes()).integer() {
1181                Ok(values) => FormatValue::PerSample(
1182                    values
1183                        .iter()
1184                        .take(sample_count)
1185                        .map(|per_sample| {
1186                            format_numeric_to_value(per_sample, tag_length, FormatValue::Int)
1187                        })
1188                        .collect(),
1189                ),
1190                Err(_) => FormatValue::Absent,
1191            },
1192            TagType::Float => match self.record.format(tag.as_bytes()).float() {
1193                Ok(values) => FormatValue::PerSample(
1194                    values
1195                        .iter()
1196                        .take(sample_count)
1197                        .map(|per_sample| {
1198                            format_numeric_to_value(per_sample, tag_length, FormatValue::Float)
1199                        })
1200                        .collect(),
1201                ),
1202                Err(_) => FormatValue::Absent,
1203            },
1204            TagType::String => match self.record.format(tag.as_bytes()).string() {
1205                Ok(values) => FormatValue::PerSample(
1206                    values
1207                        .iter()
1208                        .take(sample_count)
1209                        .map(|per_sample| format_string_to_value(per_sample, tag_length))
1210                        .collect(),
1211                ),
1212                Err(_) => FormatValue::Absent,
1213            },
1214            TagType::Flag => FormatValue::Absent,
1215        }
1216    }
1217
1218    /// Get all FORMAT field values for a single sample by name.
1219    ///
1220    /// Returns a vector of (tag_name, value) pairs for all FORMAT fields present
1221    /// in this record, plus a `genotype` entry with the parsed GT and a
1222    /// `sample_name` entry with the sample's name.
1223    /// Returns `None` if the sample is not found.
1224    pub fn sample(&self, header: &Header, sample: &str) -> Option<Vec<(String, FormatValue)>> {
1225        let sample_id = header.sample_id(sample.as_bytes())?;
1226        let sample_count = self.record.sample_count() as usize;
1227        if sample_id >= sample_count {
1228            return None;
1229        }
1230
1231        let format_tags = self.get_format_tag_names(header);
1232        let mut out: Vec<(String, FormatValue)> = Vec::with_capacity(format_tags.len() + 2);
1233
1234        for (tag_name, tag_bytes) in format_tags {
1235            let Some(value) = format_value_for_sample(header, &self.record, &tag_bytes, sample_id)
1236            else {
1237                continue;
1238            };
1239            out.push((tag_name, value));
1240        }
1241
1242        // Add parsed genotype if GT field exists
1243        if let Some(gt) = parse_genotype_for_sample(&self.record, sample_id) {
1244            out.push(("genotype".to_string(), FormatValue::Genotype(gt)));
1245        }
1246
1247        // Include the sample name so JS bindings can expose it.
1248        // Set it last so it can't be overwritten by a FORMAT tag named "sample_name".
1249        out.push((
1250            "sample_name".to_string(),
1251            FormatValue::String(sample.to_string()),
1252        ));
1253
1254        Some(out)
1255    }
1256
1257    /// Returns samples' FORMAT data as an array of objects.
1258    ///
1259    /// If `subset` is `None`, returns all samples in header order.
1260    /// If `subset` is `Some(names)`, returns only the specified samples in the
1261    /// order given. Unknown sample names are silently skipped.
1262    ///
1263    /// Each element contains all FORMAT fields plus a `sample_name` key.
1264    /// Returns an empty Vec if the VCF has no samples or no requested samples exist.
1265    pub fn samples(
1266        &self,
1267        header: &Header,
1268        subset: Option<&[&str]>,
1269    ) -> Vec<Vec<(String, FormatValue)>> {
1270        let sample_count = self.record.sample_count() as usize;
1271        if sample_count == 0 {
1272            return Vec::new();
1273        }
1274
1275        let sample_names = header.sample_names();
1276        let format_tags = self.get_format_tag_names(header);
1277
1278        // Determine which sample indices to include and in what order
1279        let sample_indices: Vec<usize> = match subset {
1280            None => (0..sample_count).collect(),
1281            Some(names) => {
1282                let name_to_idx = header.sample_name_to_idx();
1283                names
1284                    .iter()
1285                    .filter_map(|name| name_to_idx.get(*name).copied())
1286                    .collect()
1287            }
1288        };
1289
1290        if sample_indices.is_empty() {
1291            return Vec::new();
1292        }
1293
1294        // Pre-allocate result vectors for each requested sample
1295        let mut results: Vec<Vec<(String, FormatValue)>> = sample_indices
1296            .iter()
1297            .map(|_| Vec::with_capacity(format_tags.len() + 1))
1298            .collect();
1299
1300        // For each FORMAT tag, fetch values for ALL samples at once and distribute to requested ones
1301        for (tag_name, tag_bytes) in &format_tags {
1302            let Some((tag_type, tag_length)) = header.format_type(tag_bytes) else {
1303                continue;
1304            };
1305
1306            match tag_type {
1307                bcf::header::TagType::Integer => {
1308                    let Ok(all_values) = self.record.format(tag_bytes).integer() else {
1309                        continue;
1310                    };
1311                    for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1312                        if let Some(per_sample) = all_values.get(sample_idx) {
1313                            let value =
1314                                format_numeric_to_value(per_sample, tag_length, FormatValue::Int);
1315                            results[result_idx].push((tag_name.clone(), value));
1316                        }
1317                    }
1318                }
1319                bcf::header::TagType::Float => {
1320                    let Ok(all_values) = self.record.format(tag_bytes).float() else {
1321                        continue;
1322                    };
1323                    for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1324                        if let Some(per_sample) = all_values.get(sample_idx) {
1325                            let value =
1326                                format_numeric_to_value(per_sample, tag_length, FormatValue::Float);
1327                            results[result_idx].push((tag_name.clone(), value));
1328                        }
1329                    }
1330                }
1331                bcf::header::TagType::String => {
1332                    let Ok(all_values) = self.record.format(tag_bytes).string() else {
1333                        continue;
1334                    };
1335                    for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1336                        if let Some(per_sample) = all_values.get(sample_idx) {
1337                            let value = format_string_to_value(per_sample, tag_length);
1338                            results[result_idx].push((tag_name.clone(), value));
1339                        }
1340                    }
1341                }
1342                bcf::header::TagType::Flag => {
1343                    // Flags are not valid for FORMAT
1344                }
1345            }
1346        }
1347
1348        // Add parsed genotypes if GT field exists
1349        if let Ok(gts) = self.record.genotypes() {
1350            for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1351                let gt = parse_genotype(&gts.get(sample_idx));
1352                results[result_idx].push(("genotype".to_string(), FormatValue::Genotype(gt)));
1353            }
1354        }
1355
1356        // Add sample_name to each result (last, so it can't be overwritten by a FORMAT tag)
1357        for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1358            let name = sample_names
1359                .get(sample_idx)
1360                .cloned()
1361                .unwrap_or_else(|| format!("sample_{sample_idx}"));
1362            results[result_idx].push(("sample_name".to_string(), FormatValue::String(name)));
1363        }
1364
1365        results
1366    }
1367
1368    /// Get parsed genotypes for all samples or a subset.
1369    ///
1370    /// Returns a vector of [`Genotype`] structs, one per requested sample.
1371    /// If `subset` is `None`, returns genotypes for all samples in header order.
1372    /// If `subset` is `Some(names)`, returns genotypes only for those samples
1373    /// in the order specified (unknown sample names are skipped).
1374    ///
1375    /// Returns an empty vector if the record has no GT field or no samples.
1376    pub fn genotypes(&self, header: &Header, subset: Option<&[&str]>) -> Vec<Genotype> {
1377        record_genotypes(&self.record, header, subset)
1378    }
1379
1380    /// Set genotypes for all samples.
1381    ///
1382    /// Takes a slice of [`Genotype`] structs (same format returned by [`Variant::genotypes()`]).
1383    /// The length should match the sample count.
1384    ///
1385    /// # Errors
1386    ///
1387    /// Returns an error if the GT field cannot be set (e.g., not defined in header).
1388    pub fn set_genotypes(
1389        &mut self,
1390        genotypes: &[Genotype],
1391    ) -> Result<(), rust_htslib::errors::Error> {
1392        record_set_genotypes(&mut self.record, genotypes)
1393    }
1394
1395    /// Get the list of FORMAT tag names present in this record.
1396    ///
1397    /// Returns a vector of (name_string, name_bytes) tuples for efficient
1398    /// subsequent lookups.
1399    fn get_format_tag_names(&self, header: &Header) -> Vec<(String, Vec<u8>)> {
1400        let record_ptr = self.record.inner() as *const rust_htslib::htslib::bcf1_t
1401            as *mut rust_htslib::htslib::bcf1_t;
1402
1403        let n_fmt = unsafe { (*record_ptr).n_fmt() as usize };
1404        let fmt_ptr = unsafe { (*record_ptr).d.fmt };
1405
1406        if fmt_ptr.is_null() || n_fmt == 0 {
1407            return Vec::new();
1408        }
1409
1410        let mut tags = Vec::with_capacity(n_fmt);
1411        for i in 0..n_fmt {
1412            let fmt = unsafe { *fmt_ptr.add(i) };
1413            let (tag_name, tag_bytes) = header.id_to_name_cached(fmt.id as u32);
1414            tags.push((tag_name, tag_bytes));
1415        }
1416        tags
1417    }
1418
1419    /// Format the record as a VCF line string.
1420    ///
1421    /// Returns the record formatted as a tab-separated VCF line (without newline),
1422    /// or `None` if formatting fails.
1423    pub fn to_string(&self, header: &Header) -> Option<String> {
1424        let mut s = rust_htslib::htslib::kstring_t {
1425            l: 0,
1426            m: 0,
1427            s: std::ptr::null_mut(),
1428        };
1429
1430        let record_ptr = self.record.inner() as *const rust_htslib::htslib::bcf1_t
1431            as *mut rust_htslib::htslib::bcf1_t;
1432
1433        let _ = unsafe {
1434            rust_htslib::htslib::bcf_unpack(record_ptr, rust_htslib::htslib::BCF_UN_ALL as i32)
1435        };
1436
1437        let ret = unsafe {
1438            rust_htslib::htslib::vcf_format(
1439                header.inner_ptr() as *const rust_htslib::htslib::bcf_hdr_t,
1440                record_ptr as *const rust_htslib::htslib::bcf1_t,
1441                &mut s,
1442            )
1443        };
1444        if ret != 0 {
1445            if !s.s.is_null() {
1446                unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
1447            }
1448            return None;
1449        }
1450
1451        let bytes = unsafe { std::slice::from_raw_parts(s.s as *const u8, s.l as usize) };
1452        let text = String::from_utf8_lossy(bytes).into_owned();
1453
1454        if !s.s.is_null() {
1455            unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
1456        }
1457
1458        Some(text.trim_end_matches('\n').to_string())
1459    }
1460}
1461
1462#[derive(Debug)]
1463enum InfoError {
1464    Absent,
1465    Other,
1466}
1467
1468fn header_info_flag(header: &Header, record: &bcf::Record, tag: &[u8]) -> Result<bool, InfoError> {
1469    let Ok(c_str) = CString::new(tag) else {
1470        return Err(InfoError::Other);
1471    };
1472
1473    let record_ptr =
1474        record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
1475
1476    let mut dst: *mut std::os::raw::c_void = std::ptr::null_mut();
1477    let mut ndst: i32 = 0;
1478
1479    let ret = unsafe {
1480        rust_htslib::htslib::bcf_get_info_values(
1481            header.inner_ptr(),
1482            record_ptr,
1483            c_str.as_ptr() as *mut std::os::raw::c_char,
1484            &mut dst,
1485            &mut ndst,
1486            rust_htslib::htslib::BCF_HT_FLAG as i32,
1487        )
1488    };
1489
1490    if !dst.is_null() {
1491        unsafe { rust_htslib::htslib::free(dst) };
1492    }
1493
1494    match ret {
1495        -3 => Err(InfoError::Absent),
1496        1 => Ok(true),
1497        0 => Ok(false),
1498        _ => Err(InfoError::Other),
1499    }
1500}
1501
1502fn header_info_values_i32(
1503    header: &Header,
1504    record: &bcf::Record,
1505    tag: &[u8],
1506) -> Result<Option<Vec<i32>>, InfoError> {
1507    header_info_values_numeric::<i32>(header, record, tag, rust_htslib::htslib::BCF_HT_INT as i32)
1508}
1509
1510fn header_info_values_f32(
1511    header: &Header,
1512    record: &bcf::Record,
1513    tag: &[u8],
1514) -> Result<Option<Vec<f32>>, InfoError> {
1515    header_info_values_numeric::<f32>(header, record, tag, rust_htslib::htslib::BCF_HT_REAL as i32)
1516}
1517
1518fn header_info_values_numeric<T: Copy + Numeric>(
1519    header: &Header,
1520    record: &bcf::Record,
1521    tag: &[u8],
1522    data_type: i32,
1523) -> Result<Option<Vec<T>>, InfoError> {
1524    let Ok(c_str) = CString::new(tag) else {
1525        return Err(InfoError::Other);
1526    };
1527
1528    let record_ptr =
1529        record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
1530
1531    let mut dst: *mut std::os::raw::c_void = std::ptr::null_mut();
1532    let mut ndst: i32 = 0;
1533
1534    let ret = unsafe {
1535        rust_htslib::htslib::bcf_get_info_values(
1536            header.inner_ptr(),
1537            record_ptr,
1538            c_str.as_ptr() as *mut std::os::raw::c_char,
1539            &mut dst,
1540            &mut ndst,
1541            data_type,
1542        )
1543    };
1544
1545    match ret {
1546        -3 => Ok(None),
1547        0 => {
1548            if !dst.is_null() {
1549                unsafe { rust_htslib::htslib::free(dst) };
1550            }
1551            Ok(Some(Vec::new()))
1552        }
1553        ret if ret > 0 => {
1554            let slice = unsafe { std::slice::from_raw_parts(dst as *const T, ret as usize) };
1555            let vec = slice.to_vec();
1556            if !dst.is_null() {
1557                unsafe { rust_htslib::htslib::free(dst) };
1558            }
1559            Ok(Some(vec))
1560        }
1561        _ => {
1562            if !dst.is_null() {
1563                unsafe { rust_htslib::htslib::free(dst) };
1564            }
1565            Err(InfoError::Other)
1566        }
1567    }
1568}
1569
1570fn header_info_values_string(
1571    header: &Header,
1572    record: &bcf::Record,
1573    tag: &[u8],
1574) -> Result<Option<Vec<Vec<u8>>>, InfoError> {
1575    let Ok(c_str) = CString::new(tag) else {
1576        return Err(InfoError::Other);
1577    };
1578
1579    let record_ptr =
1580        record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
1581
1582    let mut dst: *mut std::os::raw::c_void = std::ptr::null_mut();
1583    let mut ndst: i32 = 0;
1584
1585    let ret = unsafe {
1586        rust_htslib::htslib::bcf_get_info_values(
1587            header.inner_ptr(),
1588            record_ptr,
1589            c_str.as_ptr() as *mut std::os::raw::c_char,
1590            &mut dst,
1591            &mut ndst,
1592            rust_htslib::htslib::BCF_HT_STR as i32,
1593        )
1594    };
1595
1596    match ret {
1597        -3 => Ok(None),
1598        0 => {
1599            if !dst.is_null() {
1600                unsafe { rust_htslib::htslib::free(dst) };
1601            }
1602            Ok(Some(Vec::new()))
1603        }
1604        ret if ret > 0 => {
1605            let bytes = unsafe { std::slice::from_raw_parts(dst as *const u8, ret as usize) };
1606            let mut out = Vec::new();
1607            for part in bytes.split(|c| *c == b',') {
1608                let part = part.split(|c| *c == 0u8).next().ok_or(InfoError::Other)?;
1609                out.push(part.to_vec());
1610            }
1611            if !dst.is_null() {
1612                unsafe { rust_htslib::htslib::free(dst) };
1613            }
1614            Ok(Some(out))
1615        }
1616        _ => {
1617            if !dst.is_null() {
1618                unsafe { rust_htslib::htslib::free(dst) };
1619            }
1620            Err(InfoError::Other)
1621        }
1622    }
1623}
1624
1625fn numeric_to_infovalue<T: Numeric + Copy>(
1626    values: Option<Vec<T>>,
1627    tag_length: TagLength,
1628    scalar: impl FnOnce(T) -> InfoValue + Copy,
1629) -> InfoValue {
1630    let Some(values) = values else {
1631        return InfoValue::Absent;
1632    };
1633
1634    match tag_length {
1635        TagLength::Fixed(1) => {
1636            let v = values.first().copied();
1637            match v {
1638                Some(v) if v.is_missing() => InfoValue::Missing,
1639                Some(v) => scalar(v),
1640                None => InfoValue::Missing,
1641            }
1642        }
1643        _ => InfoValue::Array(
1644            values
1645                .into_iter()
1646                .map(|v| {
1647                    if v.is_missing() {
1648                        InfoValue::Missing
1649                    } else {
1650                        scalar(v)
1651                    }
1652                })
1653                .collect(),
1654        ),
1655    }
1656}
1657
1658fn string_to_infovalue(values: Option<Vec<Vec<u8>>>, tag_length: TagLength) -> InfoValue {
1659    let Some(values) = values else {
1660        return InfoValue::Absent;
1661    };
1662
1663    match tag_length {
1664        TagLength::Fixed(1) => {
1665            let v = values
1666                .first()
1667                .map(|s| String::from_utf8_lossy(s).into_owned());
1668            match v {
1669                Some(v) if v.is_empty() => InfoValue::Missing,
1670                Some(v) => InfoValue::String(v),
1671                None => InfoValue::Missing,
1672            }
1673        }
1674        _ => InfoValue::Array(
1675            values
1676                .into_iter()
1677                .map(|v| {
1678                    let s = String::from_utf8_lossy(&v).into_owned();
1679                    if s.is_empty() {
1680                        InfoValue::Missing
1681                    } else {
1682                        InfoValue::String(s)
1683                    }
1684                })
1685                .collect(),
1686        ),
1687    }
1688}
1689
1690fn format_numeric_to_value<T: Numeric + Copy>(
1691    values: &[T],
1692    tag_length: TagLength,
1693    scalar: impl FnOnce(T) -> FormatValue + Copy,
1694) -> FormatValue {
1695    match tag_length {
1696        TagLength::Fixed(1) => {
1697            let v = values.first().copied();
1698            match v {
1699                Some(v) if v.is_missing() => FormatValue::Missing,
1700                Some(v) => scalar(v),
1701                None => FormatValue::Missing,
1702            }
1703        }
1704        _ => FormatValue::Array(
1705            values
1706                .iter()
1707                .copied()
1708                .map(|v| {
1709                    if v.is_missing() {
1710                        FormatValue::Missing
1711                    } else {
1712                        scalar(v)
1713                    }
1714                })
1715                .collect(),
1716        ),
1717    }
1718}
1719
1720fn format_string_to_value(value: &[u8], tag_length: TagLength) -> FormatValue {
1721    match tag_length {
1722        TagLength::Fixed(1) => {
1723            let out = String::from_utf8_lossy(value).into_owned();
1724            if out.is_empty() || out == "." {
1725                FormatValue::Missing
1726            } else {
1727                FormatValue::String(out)
1728            }
1729        }
1730        _ => {
1731            let mut parts = Vec::new();
1732            for part in value.split(|c| *c == b',') {
1733                let out = String::from_utf8_lossy(part).into_owned();
1734                if out.is_empty() || out == "." {
1735                    parts.push(FormatValue::Missing);
1736                } else {
1737                    parts.push(FormatValue::String(out));
1738                }
1739            }
1740            FormatValue::Array(parts)
1741        }
1742    }
1743}
1744
1745fn format_value_for_sample(
1746    header: &Header,
1747    record: &bcf::Record,
1748    tag: &[u8],
1749    sample_id: usize,
1750) -> Option<FormatValue> {
1751    let (tag_type, tag_length) = header.format_type(tag)?;
1752
1753    match tag_type {
1754        TagType::Integer => {
1755            let values = record.format(tag).integer().ok()?;
1756            let per_sample = values.get(sample_id)?;
1757            Some(format_numeric_to_value(
1758                per_sample,
1759                tag_length,
1760                FormatValue::Int,
1761            ))
1762        }
1763        TagType::Float => {
1764            let values = record.format(tag).float().ok()?;
1765            let per_sample = values.get(sample_id)?;
1766            Some(format_numeric_to_value(
1767                per_sample,
1768                tag_length,
1769                FormatValue::Float,
1770            ))
1771        }
1772        TagType::String => {
1773            let values = record.format(tag).string().ok()?;
1774            let per_sample = values.get(sample_id)?;
1775            Some(format_string_to_value(per_sample, tag_length))
1776        }
1777        TagType::Flag => None,
1778    }
1779}
1780
1781#[cfg(test)]
1782mod tests {
1783    use super::*;
1784    use rust_htslib::bcf::Read;
1785
1786    #[test]
1787    fn sample_includes_sample_name_and_overrides_format_tag() {
1788        let vcf = "##fileformat=VCFv4.2\n\
1789##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
1790##FORMAT=<ID=sample_name,Number=1,Type=String,Description=\"Should not override\">\n\
1791##contig=<ID=chr1>\n\
1792#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\n\
1793chr1\t1\t.\tA\tC\t.\t.\t.\tDP:sample_name\t7:EVIL\n";
1794
1795        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1796        let _ = std::fs::create_dir_all(&tmp_dir);
1797        let vcf_path = tmp_dir.join("sample-name.vcf");
1798        std::fs::write(&vcf_path, vcf).unwrap();
1799
1800        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1801        let header = unsafe { Header::new(reader.header().inner) };
1802
1803        let mut rec = reader.empty_record();
1804        let _ = reader.read(&mut rec).unwrap();
1805        let variant = Variant::from_record(rec);
1806
1807        let fields = variant.sample(&header, "S1").expect("sample exists");
1808        let mut map = std::collections::HashMap::new();
1809        for (k, v) in fields {
1810            map.insert(k, v);
1811        }
1812
1813        assert_eq!(map.get("DP"), Some(&FormatValue::Int(7)));
1814        assert_eq!(
1815            map.get("sample_name"),
1816            Some(&FormatValue::String("S1".to_string()))
1817        );
1818
1819        let _ = std::fs::remove_file(&vcf_path);
1820    }
1821
1822    #[test]
1823    fn test_set_format_integer() {
1824        let vcf = "##fileformat=VCFv4.2\n\
1825##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
1826##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
1827##contig=<ID=chr1>\n\
1828#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\tS3\n\
1829chr1\t1\t.\tA\tC\t.\t.\t.\tGT:DP\t0/1:10\t1/1:20\t0/0:30\n";
1830
1831        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1832        let _ = std::fs::create_dir_all(&tmp_dir);
1833        let vcf_path = tmp_dir.join("set-format-int.vcf");
1834        std::fs::write(&vcf_path, vcf).unwrap();
1835
1836        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1837        let header = unsafe { Header::new(reader.header().inner) };
1838
1839        let mut rec = reader.empty_record();
1840        let _ = reader.read(&mut rec).unwrap();
1841        let mut variant = Variant::from_record(rec);
1842
1843        // Set new DP values
1844        variant
1845            .set_format_integer(&header, "DP", &[100, 200, 300])
1846            .unwrap();
1847
1848        // Read back and verify
1849        let dp = variant.format(&header, "DP");
1850        match dp {
1851            FormatValue::PerSample(vals) => {
1852                assert_eq!(vals.len(), 3);
1853                assert_eq!(vals[0], FormatValue::Int(100));
1854                assert_eq!(vals[1], FormatValue::Int(200));
1855                assert_eq!(vals[2], FormatValue::Int(300));
1856            }
1857            _ => panic!("Expected PerSample, got {:?}", dp),
1858        }
1859
1860        let _ = std::fs::remove_file(&vcf_path);
1861    }
1862
1863    #[test]
1864    fn test_set_format_integer_with_missing() {
1865        let vcf = "##fileformat=VCFv4.2\n\
1866##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
1867##contig=<ID=chr1>\n\
1868#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
1869chr1\t1\t.\tA\tC\t.\t.\t.\tDP\t10\t20\n";
1870
1871        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1872        let _ = std::fs::create_dir_all(&tmp_dir);
1873        let vcf_path = tmp_dir.join("set-format-int-missing.vcf");
1874        std::fs::write(&vcf_path, vcf).unwrap();
1875
1876        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1877        let header = unsafe { Header::new(reader.header().inner) };
1878
1879        let mut rec = reader.empty_record();
1880        let _ = reader.read(&mut rec).unwrap();
1881        let mut variant = Variant::from_record(rec);
1882
1883        // Set DP with a missing value (using sentinel)
1884        let missing = format_int_missing();
1885        variant
1886            .set_format_integer(&header, "DP", &[100, missing])
1887            .unwrap();
1888
1889        let dp = variant.format(&header, "DP");
1890        match dp {
1891            FormatValue::PerSample(vals) => {
1892                assert_eq!(vals.len(), 2);
1893                assert_eq!(vals[0], FormatValue::Int(100));
1894                assert_eq!(vals[1], FormatValue::Missing);
1895            }
1896            _ => panic!("Expected PerSample, got {:?}", dp),
1897        }
1898
1899        let _ = std::fs::remove_file(&vcf_path);
1900    }
1901
1902    #[test]
1903    fn test_set_format_float() {
1904        let vcf = "##fileformat=VCFv4.2\n\
1905##FORMAT=<ID=AF,Number=1,Type=Float,Description=\"Allele Freq\">\n\
1906##contig=<ID=chr1>\n\
1907#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
1908chr1\t1\t.\tA\tC\t.\t.\t.\tAF\t0.1\t0.2\n";
1909
1910        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1911        let _ = std::fs::create_dir_all(&tmp_dir);
1912        let vcf_path = tmp_dir.join("set-format-float.vcf");
1913        std::fs::write(&vcf_path, vcf).unwrap();
1914
1915        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1916        let header = unsafe { Header::new(reader.header().inner) };
1917
1918        let mut rec = reader.empty_record();
1919        let _ = reader.read(&mut rec).unwrap();
1920        let mut variant = Variant::from_record(rec);
1921
1922        variant
1923            .set_format_float(&header, "AF", &[0.5, 0.75])
1924            .unwrap();
1925
1926        let af = variant.format(&header, "AF");
1927        match af {
1928            FormatValue::PerSample(vals) => {
1929                assert_eq!(vals.len(), 2);
1930                match &vals[0] {
1931                    FormatValue::Float(f) => assert!((f - 0.5).abs() < 0.001),
1932                    _ => panic!("Expected Float"),
1933                }
1934                match &vals[1] {
1935                    FormatValue::Float(f) => assert!((f - 0.75).abs() < 0.001),
1936                    _ => panic!("Expected Float"),
1937                }
1938            }
1939            _ => panic!("Expected PerSample, got {:?}", af),
1940        }
1941
1942        let _ = std::fs::remove_file(&vcf_path);
1943    }
1944
1945    #[test]
1946    fn test_set_format_string() {
1947        let vcf = "##fileformat=VCFv4.2\n\
1948##FORMAT=<ID=NOTE,Number=1,Type=String,Description=\"Note\">\n\
1949##contig=<ID=chr1>\n\
1950#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
1951chr1\t1\t.\tA\tC\t.\t.\t.\tNOTE\ta\tb\n";
1952
1953        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1954        let _ = std::fs::create_dir_all(&tmp_dir);
1955        let vcf_path = tmp_dir.join("set-format-string.vcf");
1956        std::fs::write(&vcf_path, vcf).unwrap();
1957
1958        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1959        let header = unsafe { Header::new(reader.header().inner) };
1960
1961        let mut rec = reader.empty_record();
1962        let _ = reader.read(&mut rec).unwrap();
1963        let mut variant = Variant::from_record(rec);
1964
1965        variant
1966            .set_format_string(&header, "NOTE", &["hello".to_string(), "world".to_string()])
1967            .unwrap();
1968
1969        let note = variant.format(&header, "NOTE");
1970        match note {
1971            FormatValue::PerSample(vals) => {
1972                assert_eq!(vals.len(), 2);
1973                assert_eq!(vals[0], FormatValue::String("hello".to_string()));
1974                assert_eq!(vals[1], FormatValue::String("world".to_string()));
1975            }
1976            _ => panic!("Expected PerSample, got {:?}", note),
1977        }
1978
1979        let _ = std::fs::remove_file(&vcf_path);
1980    }
1981
1982    #[test]
1983    fn test_set_format_rejects_gt() {
1984        let vcf = "##fileformat=VCFv4.2\n\
1985##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
1986##contig=<ID=chr1>\n\
1987#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\n\
1988chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0/1\n";
1989
1990        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1991        let _ = std::fs::create_dir_all(&tmp_dir);
1992        let vcf_path = tmp_dir.join("set-format-gt.vcf");
1993        std::fs::write(&vcf_path, vcf).unwrap();
1994
1995        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1996        let header = unsafe { Header::new(reader.header().inner) };
1997
1998        let mut rec = reader.empty_record();
1999        let _ = reader.read(&mut rec).unwrap();
2000        let mut variant = Variant::from_record(rec);
2001
2002        // Should fail when trying to set GT
2003        let result = variant.set_format_string(&header, "GT", &["0/1".to_string()]);
2004        assert!(result.is_err());
2005
2006        let _ = std::fs::remove_file(&vcf_path);
2007    }
2008
2009    #[test]
2010    fn test_clear_format() {
2011        let vcf = "##fileformat=VCFv4.2\n\
2012##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
2013##contig=<ID=chr1>\n\
2014#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
2015chr1\t1\t.\tA\tC\t.\t.\t.\tDP\t10\t20\n";
2016
2017        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2018        let _ = std::fs::create_dir_all(&tmp_dir);
2019        let vcf_path = tmp_dir.join("clear-format.vcf");
2020        std::fs::write(&vcf_path, vcf).unwrap();
2021
2022        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2023        let header = unsafe { Header::new(reader.header().inner) };
2024
2025        let mut rec = reader.empty_record();
2026        let _ = reader.read(&mut rec).unwrap();
2027        let mut variant = Variant::from_record(rec);
2028
2029        // Verify DP exists
2030        assert!(!matches!(
2031            variant.format(&header, "DP"),
2032            FormatValue::Absent
2033        ));
2034
2035        // Clear it
2036        variant.clear_format(&header, "DP").unwrap();
2037
2038        // Should now be absent
2039        assert!(matches!(variant.format(&header, "DP"), FormatValue::Absent));
2040
2041        let _ = std::fs::remove_file(&vcf_path);
2042    }
2043
2044    #[test]
2045    fn test_set_genotypes() {
2046        let vcf = "##fileformat=VCFv4.2\n\
2047##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
2048##contig=<ID=chr1>\n\
2049#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\tS3\n\
2050chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0/1\t1|1\t./.\n";
2051
2052        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2053        let _ = std::fs::create_dir_all(&tmp_dir);
2054        let vcf_path = tmp_dir.join("set-genotypes.vcf");
2055        std::fs::write(&vcf_path, vcf).unwrap();
2056
2057        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2058        let header = unsafe { Header::new(reader.header().inner) };
2059
2060        let mut rec = reader.empty_record();
2061        let _ = reader.read(&mut rec).unwrap();
2062        let mut variant = Variant::from_record(rec);
2063
2064        // Original genotypes
2065        let orig = variant.genotypes(&header, None);
2066        assert_eq!(orig.len(), 3);
2067        assert_eq!(orig[0].alleles, vec![Some(0), Some(1)]);
2068        assert_eq!(orig[0].phase, vec![false]);
2069        assert_eq!(orig[1].alleles, vec![Some(1), Some(1)]);
2070        assert_eq!(orig[1].phase, vec![true]);
2071        assert_eq!(orig[2].alleles, vec![None, None]);
2072
2073        // Set new genotypes: flip S1 to 1/0, S2 to 0/0, S3 to 1|1
2074        let new_gts = vec![
2075            Genotype {
2076                alleles: vec![Some(1), Some(0)],
2077                phase: vec![false],
2078            },
2079            Genotype {
2080                alleles: vec![Some(0), Some(0)],
2081                phase: vec![false],
2082            },
2083            Genotype {
2084                alleles: vec![Some(1), Some(1)],
2085                phase: vec![true],
2086            },
2087        ];
2088        variant.set_genotypes(&new_gts).unwrap();
2089
2090        // Verify
2091        let updated = variant.genotypes(&header, None);
2092        assert_eq!(updated.len(), 3);
2093        assert_eq!(updated[0].alleles, vec![Some(1), Some(0)]);
2094        assert_eq!(updated[0].phase, vec![false]);
2095        assert_eq!(updated[1].alleles, vec![Some(0), Some(0)]);
2096        assert_eq!(updated[1].phase, vec![false]);
2097        assert_eq!(updated[2].alleles, vec![Some(1), Some(1)]);
2098        assert_eq!(updated[2].phase, vec![true]);
2099
2100        let _ = std::fs::remove_file(&vcf_path);
2101    }
2102
2103    #[test]
2104    fn test_set_genotypes_with_missing() {
2105        let vcf = "##fileformat=VCFv4.2\n\
2106##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
2107##contig=<ID=chr1>\n\
2108#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
2109chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0/1\t1/1\n";
2110
2111        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2112        let _ = std::fs::create_dir_all(&tmp_dir);
2113        let vcf_path = tmp_dir.join("set-genotypes-missing.vcf");
2114        std::fs::write(&vcf_path, vcf).unwrap();
2115
2116        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2117        let header = unsafe { Header::new(reader.header().inner) };
2118
2119        let mut rec = reader.empty_record();
2120        let _ = reader.read(&mut rec).unwrap();
2121        let mut variant = Variant::from_record(rec);
2122
2123        // Set genotypes with missing alleles: ./1 and .|0
2124        let new_gts = vec![
2125            Genotype {
2126                alleles: vec![None, Some(1)],
2127                phase: vec![false],
2128            },
2129            Genotype {
2130                alleles: vec![None, Some(0)],
2131                phase: vec![true],
2132            },
2133        ];
2134        variant.set_genotypes(&new_gts).unwrap();
2135
2136        let updated = variant.genotypes(&header, None);
2137        assert_eq!(updated[0].alleles, vec![None, Some(1)]);
2138        assert_eq!(updated[0].phase, vec![false]);
2139        assert_eq!(updated[1].alleles, vec![None, Some(0)]);
2140        assert_eq!(updated[1].phase, vec![true]);
2141
2142        let _ = std::fs::remove_file(&vcf_path);
2143    }
2144
2145    #[test]
2146    fn test_set_genotypes_haploid() {
2147        let vcf = "##fileformat=VCFv4.2\n\
2148##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
2149##contig=<ID=chr1>\n\
2150#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
2151chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0\t1\n";
2152
2153        let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2154        let _ = std::fs::create_dir_all(&tmp_dir);
2155        let vcf_path = tmp_dir.join("set-genotypes-haploid.vcf");
2156        std::fs::write(&vcf_path, vcf).unwrap();
2157
2158        let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2159        let header = unsafe { Header::new(reader.header().inner) };
2160
2161        let mut rec = reader.empty_record();
2162        let _ = reader.read(&mut rec).unwrap();
2163        let mut variant = Variant::from_record(rec);
2164
2165        // Set haploid genotypes
2166        let new_gts = vec![
2167            Genotype {
2168                alleles: vec![Some(1)],
2169                phase: vec![],
2170            },
2171            Genotype {
2172                alleles: vec![Some(0)],
2173                phase: vec![],
2174            },
2175        ];
2176        variant.set_genotypes(&new_gts).unwrap();
2177
2178        let updated = variant.genotypes(&header, None);
2179        assert_eq!(updated[0].alleles, vec![Some(1)]);
2180        assert_eq!(updated[0].phase.len(), 0);
2181        assert_eq!(updated[1].alleles, vec![Some(0)]);
2182        assert_eq!(updated[1].phase.len(), 0);
2183
2184        let _ = std::fs::remove_file(&vcf_path);
2185    }
2186}