From 80610c7a24af6abe7aeedd03aa415fbbf02c163c Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 6 Jul 2023 20:40:26 +0000 Subject: [PATCH 01/18] WIP on docs for format bit --- vcf/src/vcf.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index dbea8d7..f5fac73 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -101,6 +101,41 @@ impl From for VCFError { /// _ => assert!(false), /// }; /// ``` +/// +/// Similarly, we can obtain the format information for a file via the `format` attribute. +/// +/// ``` +/// use vcf::vcf::parse_vcf; +/// let vcf_source = br#"##fileformat=VCFv4.4 +/// ###fileDate=20090805 +/// ###source=myImputationProgramV3.1 +/// ###reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +/// ###contig= +/// ###phasing=partial +/// ###INFO= +/// ###INFO= +/// ###INFO= +/// ###INFO= +/// ###INFO= +/// ###INFO= +/// ###FILTER= +/// ###FILTER= +/// ###FORMAT= +/// ###FORMAT= +/// ###FORMAT= +/// ###FORMAT= +/// ##CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +/// 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +/// 20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3 +/// 20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4 +/// 20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2 +/// 20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 +/// "#; +///# use vcf::vcf::VCFError; +/// let vcf = parse_vcf(&vcf_source[..])?; +/// assert_eq!(vcf.format, "VCFv4.4"); +///# Ok::<(), VCFError>(()) +/// ``` pub fn parse_vcf(source: impl BufRead) -> Result { let first_line = source.lines().next().ok_or(VCFError::ParseError)??; let parsed = Header::parse(&first_line)?; From 637ccd3e8c3a57237914b38156bad5ecb2ce0bae Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 13 Jul 2023 19:02:21 +0000 Subject: [PATCH 02/18] Better doc test --- vcf/src/vcf.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index f5fac73..90c5a1c 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -133,7 +133,12 @@ impl From for VCFError { /// "#; ///# use vcf::vcf::VCFError; /// let vcf = parse_vcf(&vcf_source[..])?; -/// assert_eq!(vcf.format, "VCFv4.4"); +/// let hq_description = vcf.format +/// .iter() +/// .find(|item| match item.get("ID") {Some("HQ") => true, _ => false}) +/// .and_then(|item| item.get("Description")) +/// .unwrap(); +/// assert_eq!(hq_description, "Haplotype Quality"); ///# Ok::<(), VCFError>(()) /// ``` pub fn parse_vcf(source: impl BufRead) -> Result { From 4695a8077bcfa8addd639728cb4034dcf27868c1 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 13 Jul 2023 19:23:18 +0000 Subject: [PATCH 03/18] Refactor previous code to get file_format separately --- vcf/src/vcf.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index 90c5a1c..bd04d3f 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -144,12 +144,12 @@ impl From for VCFError { pub fn parse_vcf(source: impl BufRead) -> Result { let first_line = source.lines().next().ok_or(VCFError::ParseError)??; let parsed = Header::parse(&first_line)?; - if is_valid_file_format(&parsed) { - match parsed.value { - Flat(s) => Ok(VCF {file_format: s.to_string()}), - _ => panic!(), - } - } else { - Err(VCFError::ParseError) + if !is_valid_file_format(&parsed) { + return Err(VCFError::ParseError) } + let file_format = match parsed.value { + Flat(s) => s.to_string(), + _ => panic!(), + }; + Ok(VCF {file_format: file_format.to_string()}) } From 8e266f4013f1ef0e8ee6986a7672fd3e303fd47d Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Fri, 14 Jul 2023 00:32:05 +0000 Subject: [PATCH 04/18] WIP on getting format bi to work --- vcf/src/vcf.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index bd04d3f..fe6acf3 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -151,5 +151,14 @@ pub fn parse_vcf(source: impl BufRead) -> Result { Flat(s) => s.to_string(), _ => panic!(), }; + let formats: Result, VCFError> = source + .lines() + .map( + |result| match result { + Ok(line) => Header::parse(&line).map_err(VCFError::from), + Err(e) => Err(VCFError::IoError(e)), + } + ) + .collect(); Ok(VCF {file_format: file_format.to_string()}) } From 75f5cefc2c86cffa98f457c0b18286401207ce5c Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 3 Aug 2023 16:08:19 +0000 Subject: [PATCH 05/18] Create lines variable so that I can use it later --- vcf/src/vcf.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index fe6acf3..699a2bc 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -142,7 +142,8 @@ impl From for VCFError { ///# Ok::<(), VCFError>(()) /// ``` pub fn parse_vcf(source: impl BufRead) -> Result { - let first_line = source.lines().next().ok_or(VCFError::ParseError)??; + let mut lines = source.lines(); + let first_line = lines.next().ok_or(VCFError::ParseError)??; let parsed = Header::parse(&first_line)?; if !is_valid_file_format(&parsed) { return Err(VCFError::ParseError) @@ -151,11 +152,10 @@ pub fn parse_vcf(source: impl BufRead) -> Result { Flat(s) => s.to_string(), _ => panic!(), }; - let formats: Result, VCFError> = source - .lines() + let formats: Result, VCFError> = lines .map( |result| match result { - Ok(line) => Header::parse(&line).map_err(VCFError::from), + Ok(ref line) => Header::parse(line).map_err(VCFError::from), Err(e) => Err(VCFError::IoError(e)), } ) From 1b026fad5994bc28394dec130223a3dc14775aeb Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 3 Aug 2023 19:05:48 +0000 Subject: [PATCH 06/18] Convert Header and HeaderValue to use String - The header needs to be returned from parse_vcf --- vcf/src/headers.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vcf/src/headers.rs b/vcf/src/headers.rs index 4e8f292..bc4616a 100644 --- a/vcf/src/headers.rs +++ b/vcf/src/headers.rs @@ -1,15 +1,15 @@ use std::collections::HashMap; #[derive(Debug, Clone, PartialEq, Eq)] -pub struct Header<'src> { - pub key: &'src str, - pub value: HeaderValue<'src>, +pub struct Header { + pub key: String, + pub value: HeaderValue, } #[derive(Debug, Clone, PartialEq, Eq)] -pub enum HeaderValue<'src> { - Flat(&'src str), - Nested(HashMap<&'src str, &'src str>), +pub enum HeaderValue { + Flat(String), + Nested(HashMap), } #[cfg(test)] From 45ca345a350d802d0dbb107141e9b22b8602837f Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 3 Aug 2023 19:08:08 +0000 Subject: [PATCH 07/18] Remove lifetime arguments from impls --- vcf/src/parse.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vcf/src/parse.rs b/vcf/src/parse.rs index dfa33dd..ee4ab61 100644 --- a/vcf/src/parse.rs +++ b/vcf/src/parse.rs @@ -12,8 +12,8 @@ lazy_static! { static ref HEADER_VALUE_REGEX: Regex = Regex::new(r#"(?:[^,"]+|(?:"[^"]*"))+"#).unwrap(); } -impl<'src> Header<'src> { - pub fn parse(input: &'src str) -> Result { +impl Header { + pub fn parse(input: &str) -> Result { let line = input.trim(); let (key, value) = line.strip_prefix("##") .and_then(|line| line.split_once('=')) @@ -23,8 +23,8 @@ impl<'src> Header<'src> { } } -impl<'src> HeaderValue<'src> { - pub fn parse(input: &'src str) -> Result { +impl HeaderValue { + pub fn parse(input: &str) -> Result { match input.strip_prefix('<').and_then(|input| input.strip_suffix('>')) { None => Ok(Self::Flat(input)), Some(pairs) => { From 3e9e838ccde830a4a24573c590d19bb1e510fa00 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 3 Aug 2023 19:45:53 +0000 Subject: [PATCH 08/18] Attempt to convert to Strings --- vcf/src/parse.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/vcf/src/parse.rs b/vcf/src/parse.rs index ee4ab61..b4f2597 100644 --- a/vcf/src/parse.rs +++ b/vcf/src/parse.rs @@ -19,14 +19,14 @@ impl Header { .and_then(|line| line.split_once('=')) .ok_or(ParseError)?; let value = HeaderValue::parse(value)?; - Ok(Self { key, value }) + Ok(Self { key.to_string(), value }) } } impl HeaderValue { pub fn parse(input: &str) -> Result { match input.strip_prefix('<').and_then(|input| input.strip_suffix('>')) { - None => Ok(Self::Flat(input)), + None => Ok(Self::Flat(input.to_string())), Some(pairs) => { HEADER_VALUE_REGEX.captures_iter(pairs) .map(|c| c.get(0).unwrap().as_str()) @@ -38,11 +38,21 @@ impl HeaderValue { } ) .collect::, _>>() + .map(|hm| hm.to_string()) .map(HeaderValue::Nested) } } } } +impl <&str> HashMap { + pub fn to_string(self) -> HashMap { + self + .into_iter() + .map(|(key, value)| (key.to_string(), value.to_string)) + .collect()::> + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ParseError; From 7dbfd55f22839e272dc2a98718d9c92fb9cc2b57 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 3 Aug 2023 21:58:59 +0000 Subject: [PATCH 09/18] Implement conversion as a simple function --- vcf/src/parse.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/vcf/src/parse.rs b/vcf/src/parse.rs index b4f2597..b9e25eb 100644 --- a/vcf/src/parse.rs +++ b/vcf/src/parse.rs @@ -12,6 +12,14 @@ lazy_static! { static ref HEADER_VALUE_REGEX: Regex = Regex::new(r#"(?:[^,"]+|(?:"[^"]*"))+"#).unwrap(); } +pub fn convert_to_string(hm: HashMap<&str, &str>) -> HashMap { + hm + .into_iter() + .map(|(key, value)| (key.to_string(), value.to_string)) + .collect::>() +} + + impl Header { pub fn parse(input: &str) -> Result { let line = input.trim(); @@ -38,21 +46,12 @@ impl HeaderValue { } ) .collect::, _>>() - .map(|hm| hm.to_string()) + .map(convert_to_string) .map(HeaderValue::Nested) } } } } -impl <&str> HashMap { - pub fn to_string(self) -> HashMap { - self - .into_iter() - .map(|(key, value)| (key.to_string(), value.to_string)) - .collect()::> - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ParseError; From 173a6865381084d41772c1636a52da1af522bcb7 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 3 Aug 2023 21:59:27 +0000 Subject: [PATCH 10/18] Starting to convert everything to string... --- vcf/src/parse.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vcf/src/parse.rs b/vcf/src/parse.rs index b9e25eb..bf6a1b1 100644 --- a/vcf/src/parse.rs +++ b/vcf/src/parse.rs @@ -27,7 +27,7 @@ impl Header { .and_then(|line| line.split_once('=')) .ok_or(ParseError)?; let value = HeaderValue::parse(value)?; - Ok(Self { key.to_string(), value }) + Ok(Self { key: key.to_string(), value: value }) } } From 58405582adb531314b9d7e5efa4a68683b6034f6 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 3 Aug 2023 22:06:48 +0000 Subject: [PATCH 11/18] Convert all remaining to string --- vcf/src/headers.rs | 12 ++++++------ vcf/src/parse.rs | 2 +- vcf/src/validate_fileformat.rs | 9 ++++++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/vcf/src/headers.rs b/vcf/src/headers.rs index bc4616a..91c3bf7 100644 --- a/vcf/src/headers.rs +++ b/vcf/src/headers.rs @@ -34,15 +34,15 @@ mod tests { headers, vec![ Header { - key: "fileformat", - value: HeaderValue::Flat("VCFv1.4"), + key: "fileformat".to_string(), + value: HeaderValue::Flat("VCFv1.4".to_string()), }, Header { - key: "INFO", + key: "INFO".to_string(), value: HeaderValue::Nested(HashMap::from([ - ("abc", "123"), - ("xyz", "3125"), - ("sfh", "574"), + ("abc".to_string(), "123".to_string()), + ("xyz".to_string(), "3125".to_string()), + ("sfh".to_string(), "574".to_string()), ])), }, ], diff --git a/vcf/src/parse.rs b/vcf/src/parse.rs index bf6a1b1..3558cfb 100644 --- a/vcf/src/parse.rs +++ b/vcf/src/parse.rs @@ -15,7 +15,7 @@ lazy_static! { pub fn convert_to_string(hm: HashMap<&str, &str>) -> HashMap { hm .into_iter() - .map(|(key, value)| (key.to_string(), value.to_string)) + .map(|(key, value)| (key.to_string(), value.to_string())) .collect::>() } diff --git a/vcf/src/validate_fileformat.rs b/vcf/src/validate_fileformat.rs index 843a356..714065f 100644 --- a/vcf/src/validate_fileformat.rs +++ b/vcf/src/validate_fileformat.rs @@ -25,19 +25,22 @@ mod tests { #[test] fn is_valid_if_key_is_fileformat() { - let header = Header {key: "fileformat", value: Flat("VCFv4.4")}; + let header = Header {key: "fileformat".to_string(), value: Flat("VCFv4.4".to_string())}; assert!(is_valid_file_format(&header)); } #[test] fn is_invalid_if_key_is_not_fileformat() { - let header = Header {key: "gileformat", value: Flat("VCFv4.4")}; + let header = Header {key: "gileformat".to_string(), value: Flat("VCFv4.4".to_string())}; assert!(!is_valid_file_format(&header)); } #[test] fn is_invalid_if_header_value_nested() { - let header = Header {key: "fileformat", value: Nested(HashMap::from([("another_key", "VCFv4.4")])) }; + let header = Header { + key: "fileformat".to_string(), + value: Nested(HashMap::from([("another_key".to_string(), "VCFv4.4".to_string())])) + }; assert!(!is_valid_file_format(&header)); } } From 1300e4a1c6e95cc00a46be52640eefcefcb9f9bb Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 10 Aug 2023 14:43:44 +0000 Subject: [PATCH 12/18] return vcf object with format field --- vcf/src/vcf.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index 699a2bc..848657d 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -7,6 +7,7 @@ use crate::parse; pub struct VCF { pub file_format: String, + pub format: Vec
, } #[derive(Debug)] @@ -152,13 +153,18 @@ pub fn parse_vcf(source: impl BufRead) -> Result { Flat(s) => s.to_string(), _ => panic!(), }; - let formats: Result, VCFError> = lines + let formats = lines .map( |result| match result { Ok(ref line) => Header::parse(line).map_err(VCFError::from), Err(e) => Err(VCFError::IoError(e)), } ) - .collect(); - Ok(VCF {file_format: file_format.to_string()}) + .collect::, _>>()?; + Ok( + VCF { + file_format: file_format.to_string(), + format: formats, + } + ) } From 197c48b10a891809179c43608a3014f4a22505b0 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Tue, 15 Aug 2023 19:47:18 +0000 Subject: [PATCH 13/18] WIP - towards getting a format field --- vcf/src/parse.rs | 1 + vcf/src/vcf.rs | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/vcf/src/parse.rs b/vcf/src/parse.rs index 3558cfb..e79117a 100644 --- a/vcf/src/parse.rs +++ b/vcf/src/parse.rs @@ -22,6 +22,7 @@ pub fn convert_to_string(hm: HashMap<&str, &str>) -> HashMap { impl Header { pub fn parse(input: &str) -> Result { + println!("Parsing header input: {}", input); let line = input.trim(); let (key, value) = line.strip_prefix("##") .and_then(|line| line.split_once('=')) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index 848657d..f42af07 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -62,7 +62,10 @@ impl From for VCFError { /// 20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 /// "#; ///# use vcf::vcf::VCFError; -/// let vcf = parse_vcf(&vcf_source[..])?; +/// let vcf = match parse_vcf(&vcf_source[..]) { +/// Ok(vcf) => vcf, +/// Err(_) => panic!("Error when we should be ok") +/// }; /// assert_eq!(vcf.file_format, "VCFv4.4"); ///# Ok::<(), VCFError>(()) /// ``` From 2656257c826c5339fd178d2df666acfc92ad3d48 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Wed, 16 Aug 2023 13:45:52 +0000 Subject: [PATCH 14/18] Revert "Remove to_string - that's in the future" This reverts commit d560aa4b1235bfd8f6cfb50cffd8a7e0704023de. --- vcf/src/headers.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vcf/src/headers.rs b/vcf/src/headers.rs index 91c3bf7..b5da98f 100644 --- a/vcf/src/headers.rs +++ b/vcf/src/headers.rs @@ -58,11 +58,11 @@ mod tests { header, Ok( Header { - key: "FORMAT", + key: "FORMAT".to_string(), value: HeaderValue::Nested(HashMap::from([ - ("abc", "123"), - ("xyz", "3125"), - ("sfh", "1,574"), + ("abc".to_string(), "123".to_string()), + ("xyz".to_string(), "3125".to_string()), + ("sfh".to_string(), "1,574".to_string()), ])), } ) @@ -78,11 +78,11 @@ mod tests { header, Ok( Header { - key: "FORMAT", + key: "FORMAT".to_string(), value: HeaderValue::Nested(HashMap::from([ - ("abc", "1,233"), - ("xyz", "3125"), - ("sfh", "157"), + ("abc".to_string(), "1,233".to_string()), + ("xyz".to_string(), "3125".to_string()), + ("sfh".to_string(), "157".to_string()), ])), } ) From 88d1bfddbacbbc2ca4cfd9206482366bc9665b94 Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Wed, 16 Aug 2023 14:37:56 +0000 Subject: [PATCH 15/18] Only parse 'header' part of document --- vcf/src/vcf.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index f42af07..89e8865 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -157,6 +157,7 @@ pub fn parse_vcf(source: impl BufRead) -> Result { _ => panic!(), }; let formats = lines + .take_while(|s| match s { Ok(s) => s.starts_with("##"), _ => true}) .map( |result| match result { Ok(ref line) => Header::parse(line).map_err(VCFError::from), From 71489a6fa4d0a3284101a6ade385d7f1f484a02f Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Wed, 16 Aug 2023 16:50:54 +0000 Subject: [PATCH 16/18] Got the edoc-test passing... --- vcf/src/vcf.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index 89e8865..8cd2e23 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -109,7 +109,9 @@ impl From for VCFError { /// Similarly, we can obtain the format information for a file via the `format` attribute. /// /// ``` +/// use std::collections::HashMap; /// use vcf::vcf::parse_vcf; +/// use vcf::{Header, HeaderValue}; /// let vcf_source = br#"##fileformat=VCFv4.4 /// ###fileDate=20090805 /// ###source=myImputationProgramV3.1 @@ -139,10 +141,24 @@ impl From for VCFError { /// let vcf = parse_vcf(&vcf_source[..])?; /// let hq_description = vcf.format /// .iter() -/// .find(|item| match item.get("ID") {Some("HQ") => true, _ => false}) -/// .and_then(|item| item.get("Description")) -/// .unwrap(); -/// assert_eq!(hq_description, "Haplotype Quality"); +/// .find( +/// |item| match &item.value { +/// HeaderValue::Nested(d) => match d.get("ID") {Some(v) => v == "HQ", _ => false}, +/// _ => false +/// } +/// ).unwrap(); +/// assert_eq!( +/// *hq_description, +/// Header { +/// key: "FORMAT".to_string(), +/// value: HeaderValue::Nested(HashMap::from([ +/// ("ID".to_string(), "HQ".to_string()), +/// ("Number".to_string(), "2".to_string()), +/// ("Type".to_string(), "Integer".to_string()), +/// ("Description".to_string(), "Haplotype Quality".to_string()), +/// ])) +/// } +/// ); ///# Ok::<(), VCFError>(()) /// ``` pub fn parse_vcf(source: impl BufRead) -> Result { From f6945ad68fb57b694aff342c2f67ceadd304a86c Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 17 Aug 2023 17:42:18 +0000 Subject: [PATCH 17/18] Filter for only FORMAT headers --- vcf/src/vcf.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index 8cd2e23..974c29e 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -180,6 +180,13 @@ pub fn parse_vcf(source: impl BufRead) -> Result { Err(e) => Err(VCFError::IoError(e)), } ) + .filter( + |result| match result { + Ok(header) if header.key == "FORMAT" => true, + Err(_) => true, + _ => false, + } + ) .collect::, _>>()?; Ok( VCF { From 09b4e2d8f086013dd5b3d229178173b1c4c0c62a Mon Sep 17 00:00:00 2001 From: ac <4184070+MrCurtis@users.noreply.github.com> Date: Thu, 17 Aug 2023 20:09:01 +0000 Subject: [PATCH 18/18] Better naming of variable in doc test --- vcf/src/vcf.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vcf/src/vcf.rs b/vcf/src/vcf.rs index 974c29e..0338976 100644 --- a/vcf/src/vcf.rs +++ b/vcf/src/vcf.rs @@ -139,7 +139,7 @@ impl From for VCFError { /// "#; ///# use vcf::vcf::VCFError; /// let vcf = parse_vcf(&vcf_source[..])?; -/// let hq_description = vcf.format +/// let hq = vcf.format /// .iter() /// .find( /// |item| match &item.value { @@ -148,7 +148,7 @@ impl From for VCFError { /// } /// ).unwrap(); /// assert_eq!( -/// *hq_description, +/// *hq, /// Header { /// key: "FORMAT".to_string(), /// value: HeaderValue::Nested(HashMap::from([