1use crate::genotype::{
50 parse_genotype, parse_genotype_for_sample, record_genotypes, record_set_genotypes, Genotype,
51};
52use crate::header::Header;
53use rust_htslib::bcf;
54use rust_htslib::bcf::header::{TagLength, TagType};
55use rust_htslib::bcf::record::Numeric;
56use std::ffi::CString;
57
58#[derive(Debug, Clone, PartialEq)]
71pub enum InfoValue {
72 Absent,
74 Missing,
76 Bool(bool),
78 Int(i32),
80 Float(f32),
82 String(String),
84 Array(Vec<InfoValue>),
86}
87
88#[derive(Debug, Clone, PartialEq)]
101pub enum FormatValue {
102 Absent,
104 Missing,
106 Int(i32),
108 Float(f32),
110 String(String),
112 Array(Vec<FormatValue>),
114 PerSample(Vec<FormatValue>),
116 Genotype(Genotype),
118}
119
120pub fn record_info(record: &bcf::Record, header: &Header, tag: &str) -> InfoValue {
131 let (tag_type, tag_length) = match header.info_type(tag.as_bytes()) {
132 Some(v) => v,
133 None => return InfoValue::Absent,
134 };
135
136 match tag_type {
137 TagType::Flag => match header_info_flag(header, record, tag.as_bytes()) {
138 Ok(v) => InfoValue::Bool(v),
139 Err(InfoError::Absent) => InfoValue::Absent,
140 Err(InfoError::Other) => InfoValue::Absent,
141 },
142 TagType::Integer => match header_info_values_i32(header, record, tag.as_bytes()) {
143 Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Int),
144 Err(InfoError::Absent) => InfoValue::Absent,
145 Err(InfoError::Other) => InfoValue::Absent,
146 },
147 TagType::Float => match header_info_values_f32(header, record, tag.as_bytes()) {
148 Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Float),
149 Err(InfoError::Absent) => InfoValue::Absent,
150 Err(InfoError::Other) => InfoValue::Absent,
151 },
152 TagType::String => match header_info_values_string(header, record, tag.as_bytes()) {
153 Ok(v) => string_to_infovalue(v, tag_length),
154 Err(InfoError::Absent) => InfoValue::Absent,
155 Err(InfoError::Other) => InfoValue::Absent,
156 },
157 }
158}
159
160pub fn record_format(record: &bcf::Record, header: &Header, tag: &str) -> FormatValue {
165 let (tag_type, tag_length) = match header.format_type(tag.as_bytes()) {
166 Some(v) => v,
167 None => return FormatValue::Absent,
168 };
169
170 let sample_count = record.sample_count() as usize;
171
172 match tag_type {
173 TagType::Integer => match record.format(tag.as_bytes()).integer() {
174 Ok(values) => FormatValue::PerSample(
175 values
176 .iter()
177 .take(sample_count)
178 .map(|per_sample| {
179 format_numeric_to_value(per_sample, tag_length, FormatValue::Int)
180 })
181 .collect(),
182 ),
183 Err(_) => FormatValue::Absent,
184 },
185 TagType::Float => match record.format(tag.as_bytes()).float() {
186 Ok(values) => FormatValue::PerSample(
187 values
188 .iter()
189 .take(sample_count)
190 .map(|per_sample| {
191 format_numeric_to_value(per_sample, tag_length, FormatValue::Float)
192 })
193 .collect(),
194 ),
195 Err(_) => FormatValue::Absent,
196 },
197 TagType::String => match record.format(tag.as_bytes()).string() {
198 Ok(values) => FormatValue::PerSample(
199 values
200 .iter()
201 .take(sample_count)
202 .map(|per_sample| format_string_to_value(per_sample, tag_length))
203 .collect(),
204 ),
205 Err(_) => FormatValue::Absent,
206 },
207 TagType::Flag => FormatValue::Absent,
208 }
209}
210
211pub fn record_sample(
216 record: &bcf::Record,
217 header: &Header,
218 sample: &str,
219) -> Option<Vec<(String, FormatValue)>> {
220 let sample_id = header.sample_id(sample.as_bytes())?;
221 let sample_count = record.sample_count() as usize;
222 if sample_id >= sample_count {
223 return None;
224 }
225
226 let format_tags = get_format_tag_names(header, record);
227 let mut out: Vec<(String, FormatValue)> = Vec::with_capacity(format_tags.len() + 2);
228
229 for (tag_name, tag_bytes) in format_tags {
230 let Some(value) = format_value_for_sample(header, record, &tag_bytes, sample_id) else {
231 continue;
232 };
233 out.push((tag_name, value));
234 }
235
236 if let Some(gt) = parse_genotype_for_sample(record, sample_id) {
238 out.push(("genotype".to_string(), FormatValue::Genotype(gt)));
239 }
240
241 out.push((
244 "sample_name".to_string(),
245 FormatValue::String(sample.to_string()),
246 ));
247
248 Some(out)
249}
250
251pub fn record_samples(
256 record: &bcf::Record,
257 header: &Header,
258 subset: Option<&[&str]>,
259) -> Vec<Vec<(String, FormatValue)>> {
260 let sample_count = record.sample_count() as usize;
261 if sample_count == 0 {
262 return Vec::new();
263 }
264
265 let sample_names = header.sample_names();
266 let format_tags = get_format_tag_names(header, record);
267
268 let sample_indices: Vec<usize> = match subset {
270 None => (0..sample_count).collect(),
271 Some(names) => {
272 let name_to_idx = header.sample_name_to_idx();
273 names
274 .iter()
275 .filter_map(|name| name_to_idx.get(*name).copied())
276 .collect()
277 }
278 };
279
280 if sample_indices.is_empty() {
281 return Vec::new();
282 }
283
284 let mut results: Vec<Vec<(String, FormatValue)>> = sample_indices
286 .iter()
287 .map(|_| Vec::with_capacity(format_tags.len() + 1))
288 .collect();
289
290 for (tag_name, tag_bytes) in &format_tags {
292 let Some((tag_type, tag_length)) = header.format_type(tag_bytes) else {
293 continue;
294 };
295
296 match tag_type {
297 bcf::header::TagType::Integer => {
298 let Ok(all_values) = record.format(tag_bytes).integer() else {
299 continue;
300 };
301 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
302 if let Some(per_sample) = all_values.get(sample_idx) {
303 let value =
304 format_numeric_to_value(per_sample, tag_length, FormatValue::Int);
305 results[result_idx].push((tag_name.clone(), value));
306 }
307 }
308 }
309 bcf::header::TagType::Float => {
310 let Ok(all_values) = record.format(tag_bytes).float() else {
311 continue;
312 };
313 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
314 if let Some(per_sample) = all_values.get(sample_idx) {
315 let value =
316 format_numeric_to_value(per_sample, tag_length, FormatValue::Float);
317 results[result_idx].push((tag_name.clone(), value));
318 }
319 }
320 }
321 bcf::header::TagType::String => {
322 let Ok(all_values) = record.format(tag_bytes).string() else {
323 continue;
324 };
325 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
326 if let Some(per_sample) = all_values.get(sample_idx) {
327 let value = format_string_to_value(per_sample, tag_length);
328 results[result_idx].push((tag_name.clone(), value));
329 }
330 }
331 }
332 bcf::header::TagType::Flag => {
333 }
335 }
336 }
337
338 if let Ok(gts) = record.genotypes() {
340 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
341 let gt = parse_genotype(>s.get(sample_idx));
342 results[result_idx].push(("genotype".to_string(), FormatValue::Genotype(gt)));
343 }
344 }
345
346 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
348 let name = sample_names
349 .get(sample_idx)
350 .cloned()
351 .unwrap_or_else(|| format!("sample_{sample_idx}"));
352 results[result_idx].push(("sample_name".to_string(), FormatValue::String(name)));
353 }
354
355 results
356}
357
358pub fn get_format_tag_names(header: &Header, record: &bcf::Record) -> Vec<(String, Vec<u8>)> {
360 let record_ptr =
361 record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
362
363 let n_fmt = unsafe { (*record_ptr).n_fmt() as usize };
364 let fmt_ptr = unsafe { (*record_ptr).d.fmt };
365
366 if fmt_ptr.is_null() || n_fmt == 0 {
367 return Vec::new();
368 }
369
370 let mut tags = Vec::with_capacity(n_fmt);
371 for i in 0..n_fmt {
372 let fmt = unsafe { *fmt_ptr.add(i) };
373 let (tag_name, tag_bytes) = header.id_to_name_cached(fmt.id as u32);
374 tags.push((tag_name, tag_bytes));
375 }
376 tags
377}
378
379pub fn record_to_string(record: &bcf::Record, header: &Header) -> Option<String> {
381 let mut s = rust_htslib::htslib::kstring_t {
382 l: 0,
383 m: 0,
384 s: std::ptr::null_mut(),
385 };
386
387 let record_ptr =
388 record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
389
390 let _ = unsafe {
391 rust_htslib::htslib::bcf_unpack(record_ptr, rust_htslib::htslib::BCF_UN_ALL as i32)
392 };
393
394 let ret = unsafe {
395 rust_htslib::htslib::vcf_format(
396 header.inner_ptr() as *const rust_htslib::htslib::bcf_hdr_t,
397 record_ptr as *const rust_htslib::htslib::bcf1_t,
398 &mut s,
399 )
400 };
401 if ret != 0 {
402 if !s.s.is_null() {
403 unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
404 }
405 return None;
406 }
407
408 let bytes = unsafe { std::slice::from_raw_parts(s.s as *const u8, s.l as usize) };
409 let text = String::from_utf8_lossy(bytes).into_owned();
410
411 if !s.s.is_null() {
412 unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
413 }
414
415 Some(text.trim_end_matches('\n').to_string())
416}
417
418pub fn record_set_info_flag(
420 record: &mut bcf::Record,
421 header: &Header,
422 tag: &str,
423 is_set: bool,
424) -> Result<(), rust_htslib::errors::Error> {
425 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
426 rust_htslib::errors::Error::BcfUndefinedTag {
427 tag: tag.to_string(),
428 }
429 })?;
430
431 if tag_type != TagType::Flag {
432 return Err(rust_htslib::errors::Error::BcfSetTag {
433 tag: tag.to_string(),
434 });
435 }
436
437 if is_set {
438 record.push_info_flag(tag.as_bytes())?;
439 } else {
440 record.clear_info_flag(tag.as_bytes())?;
441 }
442
443 record.unpack();
444 Ok(())
445}
446
447pub fn record_set_info_integer(
449 record: &mut bcf::Record,
450 header: &Header,
451 tag: &str,
452 values: &[i32],
453) -> Result<(), rust_htslib::errors::Error> {
454 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
455 rust_htslib::errors::Error::BcfUndefinedTag {
456 tag: tag.to_string(),
457 }
458 })?;
459
460 if tag_type != TagType::Integer {
461 return Err(rust_htslib::errors::Error::BcfSetTag {
462 tag: tag.to_string(),
463 });
464 }
465
466 record.push_info_integer(tag.as_bytes(), values)?;
467 record.unpack();
468 Ok(())
469}
470
471pub fn record_set_info_float(
473 record: &mut bcf::Record,
474 header: &Header,
475 tag: &str,
476 values: &[f32],
477) -> Result<(), rust_htslib::errors::Error> {
478 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
479 rust_htslib::errors::Error::BcfUndefinedTag {
480 tag: tag.to_string(),
481 }
482 })?;
483
484 if tag_type != TagType::Float {
485 return Err(rust_htslib::errors::Error::BcfSetTag {
486 tag: tag.to_string(),
487 });
488 }
489
490 record.push_info_float(tag.as_bytes(), values)?;
491 record.unpack();
492 Ok(())
493}
494
495pub fn record_set_info_string(
497 record: &mut bcf::Record,
498 header: &Header,
499 tag: &str,
500 values: &[String],
501) -> Result<(), rust_htslib::errors::Error> {
502 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
503 rust_htslib::errors::Error::BcfUndefinedTag {
504 tag: tag.to_string(),
505 }
506 })?;
507
508 if tag_type != TagType::String {
509 return Err(rust_htslib::errors::Error::BcfSetTag {
510 tag: tag.to_string(),
511 });
512 }
513
514 let refs: Vec<&[u8]> = values.iter().map(|s| s.as_bytes()).collect();
515 record.push_info_string(tag.as_bytes(), &refs)?;
516 record.unpack();
517 Ok(())
518}
519
520pub fn record_clear_info(
522 record: &mut bcf::Record,
523 header: &Header,
524 tag: &str,
525) -> Result<(), rust_htslib::errors::Error> {
526 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
527 rust_htslib::errors::Error::BcfUndefinedTag {
528 tag: tag.to_string(),
529 }
530 })?;
531
532 match tag_type {
533 TagType::Flag => record.clear_info_flag(tag.as_bytes())?,
534 TagType::Integer => record.clear_info_integer(tag.as_bytes())?,
535 TagType::Float => record.clear_info_float(tag.as_bytes())?,
536 TagType::String => record.clear_info_string(tag.as_bytes())?,
537 }
538
539 record.unpack();
540 Ok(())
541}
542
543const FORMAT_MISSING_INT: i32 = i32::MIN;
550
551fn format_missing_float() -> f32 {
554 f32::from_bits(0x7F80_0001)
555}
556
557pub fn record_set_format_integer(
572 record: &mut bcf::Record,
573 header: &Header,
574 tag: &str,
575 values: &[i32],
576) -> Result<(), rust_htslib::errors::Error> {
577 if tag == "GT" {
578 return Err(rust_htslib::errors::Error::BcfSetTag {
579 tag: "GT cannot be set via set_format; use dedicated genotype methods".to_string(),
580 });
581 }
582
583 let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
584 rust_htslib::errors::Error::BcfUndefinedTag {
585 tag: tag.to_string(),
586 }
587 })?;
588
589 if tag_type != TagType::Integer {
590 return Err(rust_htslib::errors::Error::BcfSetTag {
591 tag: tag.to_string(),
592 });
593 }
594
595 record.push_format_integer(tag.as_bytes(), values)?;
596 record.unpack();
597 Ok(())
598}
599
600pub fn record_set_format_float(
614 record: &mut bcf::Record,
615 header: &Header,
616 tag: &str,
617 values: &[f32],
618) -> Result<(), rust_htslib::errors::Error> {
619 let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
620 rust_htslib::errors::Error::BcfUndefinedTag {
621 tag: tag.to_string(),
622 }
623 })?;
624
625 if tag_type != TagType::Float {
626 return Err(rust_htslib::errors::Error::BcfSetTag {
627 tag: tag.to_string(),
628 });
629 }
630
631 record.push_format_float(tag.as_bytes(), values)?;
632 record.unpack();
633 Ok(())
634}
635
636pub fn record_set_format_string(
648 record: &mut bcf::Record,
649 header: &Header,
650 tag: &str,
651 values: &[String],
652) -> Result<(), rust_htslib::errors::Error> {
653 if tag == "GT" {
654 return Err(rust_htslib::errors::Error::BcfSetTag {
655 tag: "GT cannot be set via set_format; use dedicated genotype methods".to_string(),
656 });
657 }
658
659 let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
660 rust_htslib::errors::Error::BcfUndefinedTag {
661 tag: tag.to_string(),
662 }
663 })?;
664
665 if tag_type != TagType::String {
666 return Err(rust_htslib::errors::Error::BcfSetTag {
667 tag: tag.to_string(),
668 });
669 }
670
671 let refs: Vec<&[u8]> = values.iter().map(|s| s.as_bytes()).collect();
672 record.push_format_string(tag.as_bytes(), &refs)?;
673 record.unpack();
674 Ok(())
675}
676
677pub fn record_clear_format(
683 record: &mut bcf::Record,
684 header: &Header,
685 tag: &str,
686) -> Result<(), rust_htslib::errors::Error> {
687 if tag == "GT" {
688 return Err(rust_htslib::errors::Error::BcfSetTag {
689 tag: "GT cannot be cleared via clear_format".to_string(),
690 });
691 }
692
693 let (tag_type, _) = header.format_type(tag.as_bytes()).ok_or_else(|| {
694 rust_htslib::errors::Error::BcfUndefinedTag {
695 tag: tag.to_string(),
696 }
697 })?;
698
699 match tag_type {
702 TagType::Integer => record.push_format_integer(tag.as_bytes(), &[])?,
703 TagType::Float => record.push_format_float(tag.as_bytes(), &[])?,
704 TagType::String => record.push_format_string::<&[u8]>(tag.as_bytes(), &[])?,
705 TagType::Flag => {
706 return Err(rust_htslib::errors::Error::BcfSetTag {
708 tag: format!("FORMAT/{tag} is a Flag type which is not supported"),
709 });
710 }
711 }
712
713 record.unpack();
714 Ok(())
715}
716
717pub fn format_int_missing() -> i32 {
719 FORMAT_MISSING_INT
720}
721
722pub fn format_float_missing() -> f32 {
724 format_missing_float()
725}
726
727#[derive(Debug)]
747pub struct Variant {
748 record: bcf::Record,
749 chrom: String,
750}
751
752impl Variant {
753 pub fn from_record(mut record: bcf::Record) -> Self {
757 record.unpack();
758 let chrom = match record.rid() {
759 Some(rid) => record
760 .header()
761 .rid2name(rid)
762 .ok()
763 .map(|name| String::from_utf8_lossy(name).into_owned())
764 .unwrap_or_else(|| ".".to_string()),
765 None => ".".to_string(),
766 };
767 Self { record, chrom }
768 }
769
770 pub fn into_record(self) -> bcf::Record {
775 self.record
776 }
777
778 pub fn record_mut(&mut self) -> &mut bcf::Record {
783 &mut self.record
784 }
785
786 pub fn chrom(&self) -> &str {
788 &self.chrom
789 }
790
791 pub fn rid(&self) -> Option<u32> {
793 self.record.rid()
794 }
795
796 pub fn start(&self) -> i64 {
801 self.record.pos()
802 }
803
804 pub fn pos(&self) -> i64 {
808 self.record.pos() + 1
809 }
810
811 pub fn end(&self) -> i64 {
816 self.record.end()
817 }
818
819 pub fn id(&self) -> String {
823 String::from_utf8_lossy(&self.record.id()).into_owned()
824 }
825
826 pub fn set_id(&mut self, id: &str) -> Result<(), rust_htslib::errors::Error> {
830 let id = if id.is_empty() { "." } else { id };
831 self.record.set_id(id.as_bytes())?;
832 Ok(())
833 }
834
835 pub fn reference(&self) -> String {
837 self.record
838 .alleles()
839 .first()
840 .map(|a| String::from_utf8_lossy(a).into_owned())
841 .unwrap_or_else(|| ".".to_string())
842 }
843
844 pub fn alts(&self) -> Vec<String> {
849 self.record
850 .alleles()
851 .into_iter()
852 .skip(1)
853 .map(|a| String::from_utf8_lossy(a).into_owned())
854 .collect()
855 }
856
857 pub fn qual(&self) -> Option<f32> {
861 let qual = self.record.qual();
862 if qual.is_missing() {
863 None
864 } else {
865 Some(qual)
866 }
867 }
868
869 pub fn set_qual(&mut self, qual: Option<f32>) {
873 match qual {
874 Some(v) => self.record.set_qual(v),
875 None => self.record.set_qual(<f32 as Numeric>::missing()),
876 }
877 }
878
879 pub fn filters(&self) -> Vec<String> {
883 let header = self.record.header();
884 let mut out = Vec::new();
885 for id in self.record.filters() {
886 let name = String::from_utf8_lossy(&header.id_to_name(id)).into_owned();
887 out.push(name);
888 }
889 out
890 }
891
892 pub fn set_filters(&mut self, filters: &[String]) -> Result<(), rust_htslib::errors::Error> {
897 let want_clear = filters.is_empty()
898 || (filters.len() == 1 && (filters[0].is_empty() || filters[0] == "."));
899
900 if want_clear {
901 let refs: Vec<&[u8]> = Vec::new();
902 self.record.set_filters(&refs)?;
903 return Ok(());
904 }
905
906 let refs: Vec<&[u8]> = filters.iter().map(|s| s.as_bytes()).collect();
907 self.record.set_filters(&refs)?;
908 Ok(())
909 }
910
911 pub fn set_info_flag(
916 &mut self,
917 header: &Header,
918 tag: &str,
919 is_set: bool,
920 ) -> Result<(), rust_htslib::errors::Error> {
921 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
922 rust_htslib::errors::Error::BcfUndefinedTag {
923 tag: tag.to_string(),
924 }
925 })?;
926
927 if tag_type != TagType::Flag {
928 return Err(rust_htslib::errors::Error::BcfSetTag {
929 tag: tag.to_string(),
930 });
931 }
932
933 if is_set {
934 self.record.push_info_flag(tag.as_bytes())?;
935 } else {
936 self.record.clear_info_flag(tag.as_bytes())?;
937 }
938
939 self.record.unpack();
940 Ok(())
941 }
942
943 pub fn set_info_integer(
948 &mut self,
949 header: &Header,
950 tag: &str,
951 values: &[i32],
952 ) -> Result<(), rust_htslib::errors::Error> {
953 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
954 rust_htslib::errors::Error::BcfUndefinedTag {
955 tag: tag.to_string(),
956 }
957 })?;
958
959 if tag_type != TagType::Integer {
960 return Err(rust_htslib::errors::Error::BcfSetTag {
961 tag: tag.to_string(),
962 });
963 }
964
965 self.record.push_info_integer(tag.as_bytes(), values)?;
966 self.record.unpack();
967 Ok(())
968 }
969
970 pub fn set_info_float(
975 &mut self,
976 header: &Header,
977 tag: &str,
978 values: &[f32],
979 ) -> Result<(), rust_htslib::errors::Error> {
980 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
981 rust_htslib::errors::Error::BcfUndefinedTag {
982 tag: tag.to_string(),
983 }
984 })?;
985
986 if tag_type != TagType::Float {
987 return Err(rust_htslib::errors::Error::BcfSetTag {
988 tag: tag.to_string(),
989 });
990 }
991
992 self.record.push_info_float(tag.as_bytes(), values)?;
993 self.record.unpack();
994 Ok(())
995 }
996
997 pub fn set_info_string(
1002 &mut self,
1003 header: &Header,
1004 tag: &str,
1005 values: &[String],
1006 ) -> Result<(), rust_htslib::errors::Error> {
1007 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
1008 rust_htslib::errors::Error::BcfUndefinedTag {
1009 tag: tag.to_string(),
1010 }
1011 })?;
1012
1013 if tag_type != TagType::String {
1014 return Err(rust_htslib::errors::Error::BcfSetTag {
1015 tag: tag.to_string(),
1016 });
1017 }
1018
1019 let refs: Vec<&[u8]> = values.iter().map(|s| s.as_bytes()).collect();
1020 self.record.push_info_string(tag.as_bytes(), &refs)?;
1021 self.record.unpack();
1022 Ok(())
1023 }
1024
1025 pub fn translate(&mut self, header: &Header) -> Result<(), rust_htslib::errors::Error> {
1032 let mut view = header.translate_view();
1033 self.record.translate(&mut view)
1034 }
1035
1036 pub fn clear_info(
1040 &mut self,
1041 header: &Header,
1042 tag: &str,
1043 ) -> Result<(), rust_htslib::errors::Error> {
1044 let (tag_type, _) = header.info_type(tag.as_bytes()).ok_or_else(|| {
1045 rust_htslib::errors::Error::BcfUndefinedTag {
1046 tag: tag.to_string(),
1047 }
1048 })?;
1049
1050 match tag_type {
1051 TagType::Flag => self.record.clear_info_flag(tag.as_bytes())?,
1052 TagType::Integer => self.record.clear_info_integer(tag.as_bytes())?,
1053 TagType::Float => self.record.clear_info_float(tag.as_bytes())?,
1054 TagType::String => self.record.clear_info_string(tag.as_bytes())?,
1055 }
1056
1057 self.record.unpack();
1058 Ok(())
1059 }
1060
1061 pub fn set_format_integer(
1072 &mut self,
1073 header: &Header,
1074 tag: &str,
1075 values: &[i32],
1076 ) -> Result<(), rust_htslib::errors::Error> {
1077 record_set_format_integer(&mut self.record, header, tag, values)
1078 }
1079
1080 pub fn set_format_float(
1091 &mut self,
1092 header: &Header,
1093 tag: &str,
1094 values: &[f32],
1095 ) -> Result<(), rust_htslib::errors::Error> {
1096 record_set_format_float(&mut self.record, header, tag, values)
1097 }
1098
1099 pub fn set_format_string(
1107 &mut self,
1108 header: &Header,
1109 tag: &str,
1110 values: &[String],
1111 ) -> Result<(), rust_htslib::errors::Error> {
1112 record_set_format_string(&mut self.record, header, tag, values)
1113 }
1114
1115 pub fn clear_format(
1121 &mut self,
1122 header: &Header,
1123 tag: &str,
1124 ) -> Result<(), rust_htslib::errors::Error> {
1125 record_clear_format(&mut self.record, header, tag)
1126 }
1127
1128 pub fn info(&self, header: &Header, tag: &str) -> InfoValue {
1134 let (tag_type, tag_length) = match header.info_type(tag.as_bytes()) {
1135 Some(v) => v,
1136 None => return InfoValue::Absent,
1137 };
1138
1139 match tag_type {
1140 TagType::Flag => match header_info_flag(header, &self.record, tag.as_bytes()) {
1141 Ok(v) => InfoValue::Bool(v),
1142 Err(InfoError::Absent) => InfoValue::Absent,
1143 Err(InfoError::Other) => InfoValue::Absent,
1144 },
1145 TagType::Integer => {
1146 match header_info_values_i32(header, &self.record, tag.as_bytes()) {
1147 Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Int),
1148 Err(InfoError::Absent) => InfoValue::Absent,
1149 Err(InfoError::Other) => InfoValue::Absent,
1150 }
1151 }
1152 TagType::Float => match header_info_values_f32(header, &self.record, tag.as_bytes()) {
1153 Ok(v) => numeric_to_infovalue(v, tag_length, InfoValue::Float),
1154 Err(InfoError::Absent) => InfoValue::Absent,
1155 Err(InfoError::Other) => InfoValue::Absent,
1156 },
1157 TagType::String => {
1158 match header_info_values_string(header, &self.record, tag.as_bytes()) {
1159 Ok(v) => string_to_infovalue(v, tag_length),
1160 Err(InfoError::Absent) => InfoValue::Absent,
1161 Err(InfoError::Other) => InfoValue::Absent,
1162 }
1163 }
1164 }
1165 }
1166
1167 pub fn format(&self, header: &Header, tag: &str) -> FormatValue {
1172 let (tag_type, tag_length) = match header.format_type(tag.as_bytes()) {
1173 Some(v) => v,
1174 None => return FormatValue::Absent,
1175 };
1176
1177 let sample_count = self.record.sample_count() as usize;
1178
1179 match tag_type {
1180 TagType::Integer => match self.record.format(tag.as_bytes()).integer() {
1181 Ok(values) => FormatValue::PerSample(
1182 values
1183 .iter()
1184 .take(sample_count)
1185 .map(|per_sample| {
1186 format_numeric_to_value(per_sample, tag_length, FormatValue::Int)
1187 })
1188 .collect(),
1189 ),
1190 Err(_) => FormatValue::Absent,
1191 },
1192 TagType::Float => match self.record.format(tag.as_bytes()).float() {
1193 Ok(values) => FormatValue::PerSample(
1194 values
1195 .iter()
1196 .take(sample_count)
1197 .map(|per_sample| {
1198 format_numeric_to_value(per_sample, tag_length, FormatValue::Float)
1199 })
1200 .collect(),
1201 ),
1202 Err(_) => FormatValue::Absent,
1203 },
1204 TagType::String => match self.record.format(tag.as_bytes()).string() {
1205 Ok(values) => FormatValue::PerSample(
1206 values
1207 .iter()
1208 .take(sample_count)
1209 .map(|per_sample| format_string_to_value(per_sample, tag_length))
1210 .collect(),
1211 ),
1212 Err(_) => FormatValue::Absent,
1213 },
1214 TagType::Flag => FormatValue::Absent,
1215 }
1216 }
1217
1218 pub fn sample(&self, header: &Header, sample: &str) -> Option<Vec<(String, FormatValue)>> {
1225 let sample_id = header.sample_id(sample.as_bytes())?;
1226 let sample_count = self.record.sample_count() as usize;
1227 if sample_id >= sample_count {
1228 return None;
1229 }
1230
1231 let format_tags = self.get_format_tag_names(header);
1232 let mut out: Vec<(String, FormatValue)> = Vec::with_capacity(format_tags.len() + 2);
1233
1234 for (tag_name, tag_bytes) in format_tags {
1235 let Some(value) = format_value_for_sample(header, &self.record, &tag_bytes, sample_id)
1236 else {
1237 continue;
1238 };
1239 out.push((tag_name, value));
1240 }
1241
1242 if let Some(gt) = parse_genotype_for_sample(&self.record, sample_id) {
1244 out.push(("genotype".to_string(), FormatValue::Genotype(gt)));
1245 }
1246
1247 out.push((
1250 "sample_name".to_string(),
1251 FormatValue::String(sample.to_string()),
1252 ));
1253
1254 Some(out)
1255 }
1256
1257 pub fn samples(
1266 &self,
1267 header: &Header,
1268 subset: Option<&[&str]>,
1269 ) -> Vec<Vec<(String, FormatValue)>> {
1270 let sample_count = self.record.sample_count() as usize;
1271 if sample_count == 0 {
1272 return Vec::new();
1273 }
1274
1275 let sample_names = header.sample_names();
1276 let format_tags = self.get_format_tag_names(header);
1277
1278 let sample_indices: Vec<usize> = match subset {
1280 None => (0..sample_count).collect(),
1281 Some(names) => {
1282 let name_to_idx = header.sample_name_to_idx();
1283 names
1284 .iter()
1285 .filter_map(|name| name_to_idx.get(*name).copied())
1286 .collect()
1287 }
1288 };
1289
1290 if sample_indices.is_empty() {
1291 return Vec::new();
1292 }
1293
1294 let mut results: Vec<Vec<(String, FormatValue)>> = sample_indices
1296 .iter()
1297 .map(|_| Vec::with_capacity(format_tags.len() + 1))
1298 .collect();
1299
1300 for (tag_name, tag_bytes) in &format_tags {
1302 let Some((tag_type, tag_length)) = header.format_type(tag_bytes) else {
1303 continue;
1304 };
1305
1306 match tag_type {
1307 bcf::header::TagType::Integer => {
1308 let Ok(all_values) = self.record.format(tag_bytes).integer() else {
1309 continue;
1310 };
1311 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1312 if let Some(per_sample) = all_values.get(sample_idx) {
1313 let value =
1314 format_numeric_to_value(per_sample, tag_length, FormatValue::Int);
1315 results[result_idx].push((tag_name.clone(), value));
1316 }
1317 }
1318 }
1319 bcf::header::TagType::Float => {
1320 let Ok(all_values) = self.record.format(tag_bytes).float() else {
1321 continue;
1322 };
1323 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1324 if let Some(per_sample) = all_values.get(sample_idx) {
1325 let value =
1326 format_numeric_to_value(per_sample, tag_length, FormatValue::Float);
1327 results[result_idx].push((tag_name.clone(), value));
1328 }
1329 }
1330 }
1331 bcf::header::TagType::String => {
1332 let Ok(all_values) = self.record.format(tag_bytes).string() else {
1333 continue;
1334 };
1335 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1336 if let Some(per_sample) = all_values.get(sample_idx) {
1337 let value = format_string_to_value(per_sample, tag_length);
1338 results[result_idx].push((tag_name.clone(), value));
1339 }
1340 }
1341 }
1342 bcf::header::TagType::Flag => {
1343 }
1345 }
1346 }
1347
1348 if let Ok(gts) = self.record.genotypes() {
1350 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1351 let gt = parse_genotype(>s.get(sample_idx));
1352 results[result_idx].push(("genotype".to_string(), FormatValue::Genotype(gt)));
1353 }
1354 }
1355
1356 for (result_idx, &sample_idx) in sample_indices.iter().enumerate() {
1358 let name = sample_names
1359 .get(sample_idx)
1360 .cloned()
1361 .unwrap_or_else(|| format!("sample_{sample_idx}"));
1362 results[result_idx].push(("sample_name".to_string(), FormatValue::String(name)));
1363 }
1364
1365 results
1366 }
1367
1368 pub fn genotypes(&self, header: &Header, subset: Option<&[&str]>) -> Vec<Genotype> {
1377 record_genotypes(&self.record, header, subset)
1378 }
1379
1380 pub fn set_genotypes(
1389 &mut self,
1390 genotypes: &[Genotype],
1391 ) -> Result<(), rust_htslib::errors::Error> {
1392 record_set_genotypes(&mut self.record, genotypes)
1393 }
1394
1395 fn get_format_tag_names(&self, header: &Header) -> Vec<(String, Vec<u8>)> {
1400 let record_ptr = self.record.inner() as *const rust_htslib::htslib::bcf1_t
1401 as *mut rust_htslib::htslib::bcf1_t;
1402
1403 let n_fmt = unsafe { (*record_ptr).n_fmt() as usize };
1404 let fmt_ptr = unsafe { (*record_ptr).d.fmt };
1405
1406 if fmt_ptr.is_null() || n_fmt == 0 {
1407 return Vec::new();
1408 }
1409
1410 let mut tags = Vec::with_capacity(n_fmt);
1411 for i in 0..n_fmt {
1412 let fmt = unsafe { *fmt_ptr.add(i) };
1413 let (tag_name, tag_bytes) = header.id_to_name_cached(fmt.id as u32);
1414 tags.push((tag_name, tag_bytes));
1415 }
1416 tags
1417 }
1418
1419 pub fn to_string(&self, header: &Header) -> Option<String> {
1424 let mut s = rust_htslib::htslib::kstring_t {
1425 l: 0,
1426 m: 0,
1427 s: std::ptr::null_mut(),
1428 };
1429
1430 let record_ptr = self.record.inner() as *const rust_htslib::htslib::bcf1_t
1431 as *mut rust_htslib::htslib::bcf1_t;
1432
1433 let _ = unsafe {
1434 rust_htslib::htslib::bcf_unpack(record_ptr, rust_htslib::htslib::BCF_UN_ALL as i32)
1435 };
1436
1437 let ret = unsafe {
1438 rust_htslib::htslib::vcf_format(
1439 header.inner_ptr() as *const rust_htslib::htslib::bcf_hdr_t,
1440 record_ptr as *const rust_htslib::htslib::bcf1_t,
1441 &mut s,
1442 )
1443 };
1444 if ret != 0 {
1445 if !s.s.is_null() {
1446 unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
1447 }
1448 return None;
1449 }
1450
1451 let bytes = unsafe { std::slice::from_raw_parts(s.s as *const u8, s.l as usize) };
1452 let text = String::from_utf8_lossy(bytes).into_owned();
1453
1454 if !s.s.is_null() {
1455 unsafe { rust_htslib::htslib::free(s.s as *mut std::os::raw::c_void) };
1456 }
1457
1458 Some(text.trim_end_matches('\n').to_string())
1459 }
1460}
1461
1462#[derive(Debug)]
1463enum InfoError {
1464 Absent,
1465 Other,
1466}
1467
1468fn header_info_flag(header: &Header, record: &bcf::Record, tag: &[u8]) -> Result<bool, InfoError> {
1469 let Ok(c_str) = CString::new(tag) else {
1470 return Err(InfoError::Other);
1471 };
1472
1473 let record_ptr =
1474 record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
1475
1476 let mut dst: *mut std::os::raw::c_void = std::ptr::null_mut();
1477 let mut ndst: i32 = 0;
1478
1479 let ret = unsafe {
1480 rust_htslib::htslib::bcf_get_info_values(
1481 header.inner_ptr(),
1482 record_ptr,
1483 c_str.as_ptr() as *mut std::os::raw::c_char,
1484 &mut dst,
1485 &mut ndst,
1486 rust_htslib::htslib::BCF_HT_FLAG as i32,
1487 )
1488 };
1489
1490 if !dst.is_null() {
1491 unsafe { rust_htslib::htslib::free(dst) };
1492 }
1493
1494 match ret {
1495 -3 => Err(InfoError::Absent),
1496 1 => Ok(true),
1497 0 => Ok(false),
1498 _ => Err(InfoError::Other),
1499 }
1500}
1501
1502fn header_info_values_i32(
1503 header: &Header,
1504 record: &bcf::Record,
1505 tag: &[u8],
1506) -> Result<Option<Vec<i32>>, InfoError> {
1507 header_info_values_numeric::<i32>(header, record, tag, rust_htslib::htslib::BCF_HT_INT as i32)
1508}
1509
1510fn header_info_values_f32(
1511 header: &Header,
1512 record: &bcf::Record,
1513 tag: &[u8],
1514) -> Result<Option<Vec<f32>>, InfoError> {
1515 header_info_values_numeric::<f32>(header, record, tag, rust_htslib::htslib::BCF_HT_REAL as i32)
1516}
1517
1518fn header_info_values_numeric<T: Copy + Numeric>(
1519 header: &Header,
1520 record: &bcf::Record,
1521 tag: &[u8],
1522 data_type: i32,
1523) -> Result<Option<Vec<T>>, InfoError> {
1524 let Ok(c_str) = CString::new(tag) else {
1525 return Err(InfoError::Other);
1526 };
1527
1528 let record_ptr =
1529 record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
1530
1531 let mut dst: *mut std::os::raw::c_void = std::ptr::null_mut();
1532 let mut ndst: i32 = 0;
1533
1534 let ret = unsafe {
1535 rust_htslib::htslib::bcf_get_info_values(
1536 header.inner_ptr(),
1537 record_ptr,
1538 c_str.as_ptr() as *mut std::os::raw::c_char,
1539 &mut dst,
1540 &mut ndst,
1541 data_type,
1542 )
1543 };
1544
1545 match ret {
1546 -3 => Ok(None),
1547 0 => {
1548 if !dst.is_null() {
1549 unsafe { rust_htslib::htslib::free(dst) };
1550 }
1551 Ok(Some(Vec::new()))
1552 }
1553 ret if ret > 0 => {
1554 let slice = unsafe { std::slice::from_raw_parts(dst as *const T, ret as usize) };
1555 let vec = slice.to_vec();
1556 if !dst.is_null() {
1557 unsafe { rust_htslib::htslib::free(dst) };
1558 }
1559 Ok(Some(vec))
1560 }
1561 _ => {
1562 if !dst.is_null() {
1563 unsafe { rust_htslib::htslib::free(dst) };
1564 }
1565 Err(InfoError::Other)
1566 }
1567 }
1568}
1569
1570fn header_info_values_string(
1571 header: &Header,
1572 record: &bcf::Record,
1573 tag: &[u8],
1574) -> Result<Option<Vec<Vec<u8>>>, InfoError> {
1575 let Ok(c_str) = CString::new(tag) else {
1576 return Err(InfoError::Other);
1577 };
1578
1579 let record_ptr =
1580 record.inner() as *const rust_htslib::htslib::bcf1_t as *mut rust_htslib::htslib::bcf1_t;
1581
1582 let mut dst: *mut std::os::raw::c_void = std::ptr::null_mut();
1583 let mut ndst: i32 = 0;
1584
1585 let ret = unsafe {
1586 rust_htslib::htslib::bcf_get_info_values(
1587 header.inner_ptr(),
1588 record_ptr,
1589 c_str.as_ptr() as *mut std::os::raw::c_char,
1590 &mut dst,
1591 &mut ndst,
1592 rust_htslib::htslib::BCF_HT_STR as i32,
1593 )
1594 };
1595
1596 match ret {
1597 -3 => Ok(None),
1598 0 => {
1599 if !dst.is_null() {
1600 unsafe { rust_htslib::htslib::free(dst) };
1601 }
1602 Ok(Some(Vec::new()))
1603 }
1604 ret if ret > 0 => {
1605 let bytes = unsafe { std::slice::from_raw_parts(dst as *const u8, ret as usize) };
1606 let mut out = Vec::new();
1607 for part in bytes.split(|c| *c == b',') {
1608 let part = part.split(|c| *c == 0u8).next().ok_or(InfoError::Other)?;
1609 out.push(part.to_vec());
1610 }
1611 if !dst.is_null() {
1612 unsafe { rust_htslib::htslib::free(dst) };
1613 }
1614 Ok(Some(out))
1615 }
1616 _ => {
1617 if !dst.is_null() {
1618 unsafe { rust_htslib::htslib::free(dst) };
1619 }
1620 Err(InfoError::Other)
1621 }
1622 }
1623}
1624
1625fn numeric_to_infovalue<T: Numeric + Copy>(
1626 values: Option<Vec<T>>,
1627 tag_length: TagLength,
1628 scalar: impl FnOnce(T) -> InfoValue + Copy,
1629) -> InfoValue {
1630 let Some(values) = values else {
1631 return InfoValue::Absent;
1632 };
1633
1634 match tag_length {
1635 TagLength::Fixed(1) => {
1636 let v = values.first().copied();
1637 match v {
1638 Some(v) if v.is_missing() => InfoValue::Missing,
1639 Some(v) => scalar(v),
1640 None => InfoValue::Missing,
1641 }
1642 }
1643 _ => InfoValue::Array(
1644 values
1645 .into_iter()
1646 .map(|v| {
1647 if v.is_missing() {
1648 InfoValue::Missing
1649 } else {
1650 scalar(v)
1651 }
1652 })
1653 .collect(),
1654 ),
1655 }
1656}
1657
1658fn string_to_infovalue(values: Option<Vec<Vec<u8>>>, tag_length: TagLength) -> InfoValue {
1659 let Some(values) = values else {
1660 return InfoValue::Absent;
1661 };
1662
1663 match tag_length {
1664 TagLength::Fixed(1) => {
1665 let v = values
1666 .first()
1667 .map(|s| String::from_utf8_lossy(s).into_owned());
1668 match v {
1669 Some(v) if v.is_empty() => InfoValue::Missing,
1670 Some(v) => InfoValue::String(v),
1671 None => InfoValue::Missing,
1672 }
1673 }
1674 _ => InfoValue::Array(
1675 values
1676 .into_iter()
1677 .map(|v| {
1678 let s = String::from_utf8_lossy(&v).into_owned();
1679 if s.is_empty() {
1680 InfoValue::Missing
1681 } else {
1682 InfoValue::String(s)
1683 }
1684 })
1685 .collect(),
1686 ),
1687 }
1688}
1689
1690fn format_numeric_to_value<T: Numeric + Copy>(
1691 values: &[T],
1692 tag_length: TagLength,
1693 scalar: impl FnOnce(T) -> FormatValue + Copy,
1694) -> FormatValue {
1695 match tag_length {
1696 TagLength::Fixed(1) => {
1697 let v = values.first().copied();
1698 match v {
1699 Some(v) if v.is_missing() => FormatValue::Missing,
1700 Some(v) => scalar(v),
1701 None => FormatValue::Missing,
1702 }
1703 }
1704 _ => FormatValue::Array(
1705 values
1706 .iter()
1707 .copied()
1708 .map(|v| {
1709 if v.is_missing() {
1710 FormatValue::Missing
1711 } else {
1712 scalar(v)
1713 }
1714 })
1715 .collect(),
1716 ),
1717 }
1718}
1719
1720fn format_string_to_value(value: &[u8], tag_length: TagLength) -> FormatValue {
1721 match tag_length {
1722 TagLength::Fixed(1) => {
1723 let out = String::from_utf8_lossy(value).into_owned();
1724 if out.is_empty() || out == "." {
1725 FormatValue::Missing
1726 } else {
1727 FormatValue::String(out)
1728 }
1729 }
1730 _ => {
1731 let mut parts = Vec::new();
1732 for part in value.split(|c| *c == b',') {
1733 let out = String::from_utf8_lossy(part).into_owned();
1734 if out.is_empty() || out == "." {
1735 parts.push(FormatValue::Missing);
1736 } else {
1737 parts.push(FormatValue::String(out));
1738 }
1739 }
1740 FormatValue::Array(parts)
1741 }
1742 }
1743}
1744
1745fn format_value_for_sample(
1746 header: &Header,
1747 record: &bcf::Record,
1748 tag: &[u8],
1749 sample_id: usize,
1750) -> Option<FormatValue> {
1751 let (tag_type, tag_length) = header.format_type(tag)?;
1752
1753 match tag_type {
1754 TagType::Integer => {
1755 let values = record.format(tag).integer().ok()?;
1756 let per_sample = values.get(sample_id)?;
1757 Some(format_numeric_to_value(
1758 per_sample,
1759 tag_length,
1760 FormatValue::Int,
1761 ))
1762 }
1763 TagType::Float => {
1764 let values = record.format(tag).float().ok()?;
1765 let per_sample = values.get(sample_id)?;
1766 Some(format_numeric_to_value(
1767 per_sample,
1768 tag_length,
1769 FormatValue::Float,
1770 ))
1771 }
1772 TagType::String => {
1773 let values = record.format(tag).string().ok()?;
1774 let per_sample = values.get(sample_id)?;
1775 Some(format_string_to_value(per_sample, tag_length))
1776 }
1777 TagType::Flag => None,
1778 }
1779}
1780
1781#[cfg(test)]
1782mod tests {
1783 use super::*;
1784 use rust_htslib::bcf::Read;
1785
1786 #[test]
1787 fn sample_includes_sample_name_and_overrides_format_tag() {
1788 let vcf = "##fileformat=VCFv4.2\n\
1789##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
1790##FORMAT=<ID=sample_name,Number=1,Type=String,Description=\"Should not override\">\n\
1791##contig=<ID=chr1>\n\
1792#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\n\
1793chr1\t1\t.\tA\tC\t.\t.\t.\tDP:sample_name\t7:EVIL\n";
1794
1795 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1796 let _ = std::fs::create_dir_all(&tmp_dir);
1797 let vcf_path = tmp_dir.join("sample-name.vcf");
1798 std::fs::write(&vcf_path, vcf).unwrap();
1799
1800 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1801 let header = unsafe { Header::new(reader.header().inner) };
1802
1803 let mut rec = reader.empty_record();
1804 let _ = reader.read(&mut rec).unwrap();
1805 let variant = Variant::from_record(rec);
1806
1807 let fields = variant.sample(&header, "S1").expect("sample exists");
1808 let mut map = std::collections::HashMap::new();
1809 for (k, v) in fields {
1810 map.insert(k, v);
1811 }
1812
1813 assert_eq!(map.get("DP"), Some(&FormatValue::Int(7)));
1814 assert_eq!(
1815 map.get("sample_name"),
1816 Some(&FormatValue::String("S1".to_string()))
1817 );
1818
1819 let _ = std::fs::remove_file(&vcf_path);
1820 }
1821
1822 #[test]
1823 fn test_set_format_integer() {
1824 let vcf = "##fileformat=VCFv4.2\n\
1825##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
1826##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
1827##contig=<ID=chr1>\n\
1828#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\tS3\n\
1829chr1\t1\t.\tA\tC\t.\t.\t.\tGT:DP\t0/1:10\t1/1:20\t0/0:30\n";
1830
1831 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1832 let _ = std::fs::create_dir_all(&tmp_dir);
1833 let vcf_path = tmp_dir.join("set-format-int.vcf");
1834 std::fs::write(&vcf_path, vcf).unwrap();
1835
1836 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1837 let header = unsafe { Header::new(reader.header().inner) };
1838
1839 let mut rec = reader.empty_record();
1840 let _ = reader.read(&mut rec).unwrap();
1841 let mut variant = Variant::from_record(rec);
1842
1843 variant
1845 .set_format_integer(&header, "DP", &[100, 200, 300])
1846 .unwrap();
1847
1848 let dp = variant.format(&header, "DP");
1850 match dp {
1851 FormatValue::PerSample(vals) => {
1852 assert_eq!(vals.len(), 3);
1853 assert_eq!(vals[0], FormatValue::Int(100));
1854 assert_eq!(vals[1], FormatValue::Int(200));
1855 assert_eq!(vals[2], FormatValue::Int(300));
1856 }
1857 _ => panic!("Expected PerSample, got {:?}", dp),
1858 }
1859
1860 let _ = std::fs::remove_file(&vcf_path);
1861 }
1862
1863 #[test]
1864 fn test_set_format_integer_with_missing() {
1865 let vcf = "##fileformat=VCFv4.2\n\
1866##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
1867##contig=<ID=chr1>\n\
1868#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
1869chr1\t1\t.\tA\tC\t.\t.\t.\tDP\t10\t20\n";
1870
1871 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1872 let _ = std::fs::create_dir_all(&tmp_dir);
1873 let vcf_path = tmp_dir.join("set-format-int-missing.vcf");
1874 std::fs::write(&vcf_path, vcf).unwrap();
1875
1876 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1877 let header = unsafe { Header::new(reader.header().inner) };
1878
1879 let mut rec = reader.empty_record();
1880 let _ = reader.read(&mut rec).unwrap();
1881 let mut variant = Variant::from_record(rec);
1882
1883 let missing = format_int_missing();
1885 variant
1886 .set_format_integer(&header, "DP", &[100, missing])
1887 .unwrap();
1888
1889 let dp = variant.format(&header, "DP");
1890 match dp {
1891 FormatValue::PerSample(vals) => {
1892 assert_eq!(vals.len(), 2);
1893 assert_eq!(vals[0], FormatValue::Int(100));
1894 assert_eq!(vals[1], FormatValue::Missing);
1895 }
1896 _ => panic!("Expected PerSample, got {:?}", dp),
1897 }
1898
1899 let _ = std::fs::remove_file(&vcf_path);
1900 }
1901
1902 #[test]
1903 fn test_set_format_float() {
1904 let vcf = "##fileformat=VCFv4.2\n\
1905##FORMAT=<ID=AF,Number=1,Type=Float,Description=\"Allele Freq\">\n\
1906##contig=<ID=chr1>\n\
1907#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
1908chr1\t1\t.\tA\tC\t.\t.\t.\tAF\t0.1\t0.2\n";
1909
1910 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1911 let _ = std::fs::create_dir_all(&tmp_dir);
1912 let vcf_path = tmp_dir.join("set-format-float.vcf");
1913 std::fs::write(&vcf_path, vcf).unwrap();
1914
1915 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1916 let header = unsafe { Header::new(reader.header().inner) };
1917
1918 let mut rec = reader.empty_record();
1919 let _ = reader.read(&mut rec).unwrap();
1920 let mut variant = Variant::from_record(rec);
1921
1922 variant
1923 .set_format_float(&header, "AF", &[0.5, 0.75])
1924 .unwrap();
1925
1926 let af = variant.format(&header, "AF");
1927 match af {
1928 FormatValue::PerSample(vals) => {
1929 assert_eq!(vals.len(), 2);
1930 match &vals[0] {
1931 FormatValue::Float(f) => assert!((f - 0.5).abs() < 0.001),
1932 _ => panic!("Expected Float"),
1933 }
1934 match &vals[1] {
1935 FormatValue::Float(f) => assert!((f - 0.75).abs() < 0.001),
1936 _ => panic!("Expected Float"),
1937 }
1938 }
1939 _ => panic!("Expected PerSample, got {:?}", af),
1940 }
1941
1942 let _ = std::fs::remove_file(&vcf_path);
1943 }
1944
1945 #[test]
1946 fn test_set_format_string() {
1947 let vcf = "##fileformat=VCFv4.2\n\
1948##FORMAT=<ID=NOTE,Number=1,Type=String,Description=\"Note\">\n\
1949##contig=<ID=chr1>\n\
1950#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
1951chr1\t1\t.\tA\tC\t.\t.\t.\tNOTE\ta\tb\n";
1952
1953 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1954 let _ = std::fs::create_dir_all(&tmp_dir);
1955 let vcf_path = tmp_dir.join("set-format-string.vcf");
1956 std::fs::write(&vcf_path, vcf).unwrap();
1957
1958 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1959 let header = unsafe { Header::new(reader.header().inner) };
1960
1961 let mut rec = reader.empty_record();
1962 let _ = reader.read(&mut rec).unwrap();
1963 let mut variant = Variant::from_record(rec);
1964
1965 variant
1966 .set_format_string(&header, "NOTE", &["hello".to_string(), "world".to_string()])
1967 .unwrap();
1968
1969 let note = variant.format(&header, "NOTE");
1970 match note {
1971 FormatValue::PerSample(vals) => {
1972 assert_eq!(vals.len(), 2);
1973 assert_eq!(vals[0], FormatValue::String("hello".to_string()));
1974 assert_eq!(vals[1], FormatValue::String("world".to_string()));
1975 }
1976 _ => panic!("Expected PerSample, got {:?}", note),
1977 }
1978
1979 let _ = std::fs::remove_file(&vcf_path);
1980 }
1981
1982 #[test]
1983 fn test_set_format_rejects_gt() {
1984 let vcf = "##fileformat=VCFv4.2\n\
1985##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
1986##contig=<ID=chr1>\n\
1987#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\n\
1988chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0/1\n";
1989
1990 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
1991 let _ = std::fs::create_dir_all(&tmp_dir);
1992 let vcf_path = tmp_dir.join("set-format-gt.vcf");
1993 std::fs::write(&vcf_path, vcf).unwrap();
1994
1995 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
1996 let header = unsafe { Header::new(reader.header().inner) };
1997
1998 let mut rec = reader.empty_record();
1999 let _ = reader.read(&mut rec).unwrap();
2000 let mut variant = Variant::from_record(rec);
2001
2002 let result = variant.set_format_string(&header, "GT", &["0/1".to_string()]);
2004 assert!(result.is_err());
2005
2006 let _ = std::fs::remove_file(&vcf_path);
2007 }
2008
2009 #[test]
2010 fn test_clear_format() {
2011 let vcf = "##fileformat=VCFv4.2\n\
2012##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Depth\">\n\
2013##contig=<ID=chr1>\n\
2014#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
2015chr1\t1\t.\tA\tC\t.\t.\t.\tDP\t10\t20\n";
2016
2017 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2018 let _ = std::fs::create_dir_all(&tmp_dir);
2019 let vcf_path = tmp_dir.join("clear-format.vcf");
2020 std::fs::write(&vcf_path, vcf).unwrap();
2021
2022 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2023 let header = unsafe { Header::new(reader.header().inner) };
2024
2025 let mut rec = reader.empty_record();
2026 let _ = reader.read(&mut rec).unwrap();
2027 let mut variant = Variant::from_record(rec);
2028
2029 assert!(!matches!(
2031 variant.format(&header, "DP"),
2032 FormatValue::Absent
2033 ));
2034
2035 variant.clear_format(&header, "DP").unwrap();
2037
2038 assert!(matches!(variant.format(&header, "DP"), FormatValue::Absent));
2040
2041 let _ = std::fs::remove_file(&vcf_path);
2042 }
2043
2044 #[test]
2045 fn test_set_genotypes() {
2046 let vcf = "##fileformat=VCFv4.2\n\
2047##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
2048##contig=<ID=chr1>\n\
2049#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\tS3\n\
2050chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0/1\t1|1\t./.\n";
2051
2052 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2053 let _ = std::fs::create_dir_all(&tmp_dir);
2054 let vcf_path = tmp_dir.join("set-genotypes.vcf");
2055 std::fs::write(&vcf_path, vcf).unwrap();
2056
2057 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2058 let header = unsafe { Header::new(reader.header().inner) };
2059
2060 let mut rec = reader.empty_record();
2061 let _ = reader.read(&mut rec).unwrap();
2062 let mut variant = Variant::from_record(rec);
2063
2064 let orig = variant.genotypes(&header, None);
2066 assert_eq!(orig.len(), 3);
2067 assert_eq!(orig[0].alleles, vec![Some(0), Some(1)]);
2068 assert_eq!(orig[0].phase, vec![false]);
2069 assert_eq!(orig[1].alleles, vec![Some(1), Some(1)]);
2070 assert_eq!(orig[1].phase, vec![true]);
2071 assert_eq!(orig[2].alleles, vec![None, None]);
2072
2073 let new_gts = vec![
2075 Genotype {
2076 alleles: vec![Some(1), Some(0)],
2077 phase: vec![false],
2078 },
2079 Genotype {
2080 alleles: vec![Some(0), Some(0)],
2081 phase: vec![false],
2082 },
2083 Genotype {
2084 alleles: vec![Some(1), Some(1)],
2085 phase: vec![true],
2086 },
2087 ];
2088 variant.set_genotypes(&new_gts).unwrap();
2089
2090 let updated = variant.genotypes(&header, None);
2092 assert_eq!(updated.len(), 3);
2093 assert_eq!(updated[0].alleles, vec![Some(1), Some(0)]);
2094 assert_eq!(updated[0].phase, vec![false]);
2095 assert_eq!(updated[1].alleles, vec![Some(0), Some(0)]);
2096 assert_eq!(updated[1].phase, vec![false]);
2097 assert_eq!(updated[2].alleles, vec![Some(1), Some(1)]);
2098 assert_eq!(updated[2].phase, vec![true]);
2099
2100 let _ = std::fs::remove_file(&vcf_path);
2101 }
2102
2103 #[test]
2104 fn test_set_genotypes_with_missing() {
2105 let vcf = "##fileformat=VCFv4.2\n\
2106##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
2107##contig=<ID=chr1>\n\
2108#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
2109chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0/1\t1/1\n";
2110
2111 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2112 let _ = std::fs::create_dir_all(&tmp_dir);
2113 let vcf_path = tmp_dir.join("set-genotypes-missing.vcf");
2114 std::fs::write(&vcf_path, vcf).unwrap();
2115
2116 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2117 let header = unsafe { Header::new(reader.header().inner) };
2118
2119 let mut rec = reader.empty_record();
2120 let _ = reader.read(&mut rec).unwrap();
2121 let mut variant = Variant::from_record(rec);
2122
2123 let new_gts = vec![
2125 Genotype {
2126 alleles: vec![None, Some(1)],
2127 phase: vec![false],
2128 },
2129 Genotype {
2130 alleles: vec![None, Some(0)],
2131 phase: vec![true],
2132 },
2133 ];
2134 variant.set_genotypes(&new_gts).unwrap();
2135
2136 let updated = variant.genotypes(&header, None);
2137 assert_eq!(updated[0].alleles, vec![None, Some(1)]);
2138 assert_eq!(updated[0].phase, vec![false]);
2139 assert_eq!(updated[1].alleles, vec![None, Some(0)]);
2140 assert_eq!(updated[1].phase, vec![true]);
2141
2142 let _ = std::fs::remove_file(&vcf_path);
2143 }
2144
2145 #[test]
2146 fn test_set_genotypes_haploid() {
2147 let vcf = "##fileformat=VCFv4.2\n\
2148##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n\
2149##contig=<ID=chr1>\n\
2150#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\n\
2151chr1\t1\t.\tA\tC\t.\t.\t.\tGT\t0\t1\n";
2152
2153 let tmp_dir = std::env::temp_dir().join("htsvcf-core-test");
2154 let _ = std::fs::create_dir_all(&tmp_dir);
2155 let vcf_path = tmp_dir.join("set-genotypes-haploid.vcf");
2156 std::fs::write(&vcf_path, vcf).unwrap();
2157
2158 let mut reader = bcf::Reader::from_path(&vcf_path).unwrap();
2159 let header = unsafe { Header::new(reader.header().inner) };
2160
2161 let mut rec = reader.empty_record();
2162 let _ = reader.read(&mut rec).unwrap();
2163 let mut variant = Variant::from_record(rec);
2164
2165 let new_gts = vec![
2167 Genotype {
2168 alleles: vec![Some(1)],
2169 phase: vec![],
2170 },
2171 Genotype {
2172 alleles: vec![Some(0)],
2173 phase: vec![],
2174 },
2175 ];
2176 variant.set_genotypes(&new_gts).unwrap();
2177
2178 let updated = variant.genotypes(&header, None);
2179 assert_eq!(updated[0].alleles, vec![Some(1)]);
2180 assert_eq!(updated[0].phase.len(), 0);
2181 assert_eq!(updated[1].alleles, vec![Some(0)]);
2182 assert_eq!(updated[1].phase.len(), 0);
2183
2184 let _ = std::fs::remove_file(&vcf_path);
2185 }
2186}