From 19d8512ced077f62600d449268aa403bd71172be Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 15 Jun 2025 03:39:19 +0500 Subject: [PATCH 1/3] Allow both `&text` and `$value` fields in the same struct failures: fixed_name::variable_size::text_and_value variable_name::variable_size::text_and_value --- Changelog.md | 4 ++ src/de/map.rs | 10 ++-- src/de/mod.rs | 28 ++++++++--- tests/serde-de-seq.rs | 106 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 10 deletions(-) diff --git a/Changelog.md b/Changelog.md index 5f0ddd0d..0df5693c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -33,6 +33,9 @@ XML specification. See the updated `custom_entities` example! ### Bug Fixes +- [#868]: Allow to have both `$text` and `$value` special fields in one struct. Previously + any text will be recognized as `$value` field even when `$text` field is also presented. + ### Misc Changes - [#863]: Remove `From> for BytesStart<'a>` because now `BytesStart` stores the @@ -42,6 +45,7 @@ XML specification. See the updated `custom_entities` example! [#766]: https://github.com/tafia/quick-xml/pull/766 [#863]: https://github.com/tafia/quick-xml/pull/863 +[#868]: https://github.com/tafia/quick-xml/pull/868 [general entity]: https://www.w3.org/TR/xml11/#gen-entity diff --git a/src/de/map.rs b/src/de/map.rs index c5540459..8a4eca8a 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -192,6 +192,9 @@ where /// value for VALUE_KEY field /// ``` has_value_field: bool, + /// If `true`, then the deserialized struct has a field with a special name: + /// [`TEXT_KEY`]. + has_text_field: bool, } impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E> @@ -212,6 +215,7 @@ where source: ValueSource::Unknown, fields, has_value_field: fields.contains(&VALUE_KEY), + has_text_field: fields.contains(&TEXT_KEY), }) } @@ -264,10 +268,8 @@ where } else { // try getting from events (value) match self.de.peek()? { - // We shouldn't have both `$value` and `$text` fields in the same - // struct, so if we have `$value` field, the we should deserialize - // text content to `$value` - DeEvent::Text(_) if self.has_value_field => { + // If we have dedicated "$text" field, it will not be passed to "$value" field + DeEvent::Text(_) if self.has_value_field && !self.has_text_field => { self.source = ValueSource::Content; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the diff --git a/src/de/mod.rs b/src/de/mod.rs index 2b7d0f3c..3c04ba14 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1506,6 +1506,28 @@ //! The only difference is in how complex types and sequences are serialized. //! If you doubt which one you should select, begin with [`$value`](#value). //! +//! If you have both `$text` and `$value` in you struct, then text events will be +//! mapped to the `$text` field: +//! +//! ``` +//! # use serde::Deserialize; +//! # use quick_xml::de::from_str; +//! #[derive(Deserialize, PartialEq, Debug)] +//! struct TextAndValue { +//! #[serde(rename = "$text")] +//! text: Option, +//! +//! #[serde(rename = "$value")] +//! value: Option, +//! } +//! +//! let object: TextAndValue = from_str("text ").unwrap(); +//! assert_eq!(object, TextAndValue { +//! text: Some("text and CDATA".to_string()), +//! value: None, +//! }); +//! ``` +//! //! ## `$text` //! `$text` is used when you want to write your XML as a text or a CDATA content. //! More formally, field with that name represents simple type definition with @@ -1744,12 +1766,6 @@ //! assert_eq!(object, obj); //! ``` //! -//! ---------------------------------------------------------------------------- -//! -//! You can have either `$text` or `$value` field in your structs. Unfortunately, -//! that is not enforced, so you can theoretically have both, but you should -//! avoid that. -//! //! //! //! Frequently Used Patterns diff --git a/tests/serde-de-seq.rs b/tests/serde-de-seq.rs index a1aaa6dc..d8287ba6 100644 --- a/tests/serde-de-seq.rs +++ b/tests/serde-de-seq.rs @@ -821,6 +821,28 @@ mod fixed_name { ); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + item: [(); 2], + } + + from_str::( + r#" + + + + text + + + "#, + ) + .unwrap(); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by [`xs:list`s](https://www.w3schools.com/xml/el_list.asp) mod xs_list { @@ -1656,6 +1678,36 @@ mod fixed_name { ); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + item: Vec<()>, + } + + let data: List = from_str( + r#" + + + + text + + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + text: (), + item: vec![(); 2], + } + ); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by `xs:list`s mod xs_list { @@ -2883,6 +2935,29 @@ mod variable_name { ); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + #[serde(rename = "$value")] + value: [(); 2], + } + + from_str::( + r#" + + + + text + + + "#, + ) + .unwrap(); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by `xs:list`s mod xs_list { @@ -3929,6 +4004,37 @@ mod variable_name { .unwrap_err(); } + #[test] + fn text_and_value() { + #[derive(Debug, PartialEq, Deserialize)] + struct List { + #[serde(rename = "$text")] + text: (), + #[serde(rename = "$value")] + value: Vec<()>, + } + + let data: List = from_str( + r#" + + + + text + + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + text: (), + value: vec![(); 2], + } + ); + } + /// Checks that sequences represented by elements can contain sequences, /// represented by `xs:list`s mod xs_list { From 01caf74a5d3ef39540152530726f9df5523ff4a4 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 15 Jun 2025 02:49:50 +0500 Subject: [PATCH 2/3] Add regression test for #868 failures: serde-de-seq: fixed_name::variable_size::text_and_value variable_name::variable_size::text_and_value serde-issues: issue868 --- tests/serde-issues.rs | 83 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tests/serde-issues.rs b/tests/serde-issues.rs index 7826669d..f9d4ee1e 100644 --- a/tests/serde-issues.rs +++ b/tests/serde-issues.rs @@ -570,3 +570,86 @@ fn issue683() { } ); } + +/// Regression test for https://github.com/tafia/quick-xml/issues/868. +#[test] +fn issue868() { + #[derive(Debug, PartialEq, Deserialize)] + #[serde(rename = "root")] + pub struct Root { + #[serde(rename = "key")] + pub key: Key, + } + + #[derive(Debug, PartialEq, Deserialize)] + #[serde(rename = "key")] + pub struct Key { + #[serde(rename = "value")] + pub values: Option>, + } + + #[derive(Debug, PartialEq, Deserialize)] + #[serde(rename = "Value")] + pub struct Value { + #[serde(rename = "@id")] + pub id: String, + + #[serde(rename = "$text")] + pub text: Option, + } + let xml = r#" + + + + text1 + text2 + text3 + text4d + text5 + text6 + + "#; + let data = quick_xml::de::from_str::(xml); + #[cfg(feature = "overlapped-lists")] + assert_eq!( + data.unwrap(), + Root { + key: Key { + values: Some(vec![ + Value { + id: "1".to_string(), + text: Some("text1".to_string()) + }, + Value { + id: "2".to_string(), + text: Some("text2".to_string()) + }, + Value { + id: "3".to_string(), + text: Some("text3".to_string()) + }, + Value { + id: "4".to_string(), + text: Some("text4".to_string()) + }, + Value { + id: "5".to_string(), + text: Some("text5".to_string()) + }, + Value { + id: "6".to_string(), + text: Some("text6".to_string()) + }, + ]), + }, + } + ); + #[cfg(not(feature = "overlapped-lists"))] + match data { + Err(quick_xml::DeError::Custom(e)) => assert_eq!(e, "duplicate field `value`"), + e => panic!( + r#"Expected `Err(Custom("duplicate field `value`"))`, but got `{:?}`"#, + e + ), + } +} From aa23b65ae80775acd5d1b231bdb2ed34c2aa1e52 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 15 Jun 2025 02:56:48 +0500 Subject: [PATCH 3/3] Skip unwanted text events in sequences If sequence expected only tags, skip any texts (CDATA included) If sequence expected any values, skip texts (CDATA included) if there is a dedicated "$text" field Fixed: serde-de-seq: fixed_name::variable_size::text_and_value variable_name::variable_size::text_and_value serde-issues: issue868 --- Changelog.md | 1 + src/de/map.rs | 28 ++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/Changelog.md b/Changelog.md index 0df5693c..b0ffa638 100644 --- a/Changelog.md +++ b/Changelog.md @@ -35,6 +35,7 @@ XML specification. See the updated `custom_entities` example! - [#868]: Allow to have both `$text` and `$value` special fields in one struct. Previously any text will be recognized as `$value` field even when `$text` field is also presented. +- [#868]: Skip text events when deserialize a sequence of items overlapped with text (including CDATA). ### Misc Changes diff --git a/src/de/map.rs b/src/de/map.rs index 8a4eca8a..224d5bd1 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -611,7 +611,7 @@ where _ => unreachable!(), } } else { - TagFilter::Exclude(self.map.fields) + TagFilter::Exclude(self.map.fields, self.map.has_text_field) }; visitor.visit_seq(MapValueSeqAccess { #[cfg(feature = "overlapped-lists")] @@ -852,15 +852,27 @@ enum TagFilter<'de> { Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag /// A `SeqAccess` interested in tags with any name, except explicitly listed. /// Excluded tags are used as struct field names and therefore should not - /// fall into a `$value` category - Exclude(&'static [&'static str]), + /// fall into a `$value` category. + /// + /// The `bool` represents the having of a `$text` special field in fields array. + /// It is used to exclude text events when `$text` fields is defined together with + /// `$value` fieldб and `$value` accepts sequence. + Exclude(&'static [&'static str], bool), } impl<'de> TagFilter<'de> { fn is_suitable(&self, start: &BytesStart) -> Result { match self { Self::Include(n) => Ok(n.name() == start.name()), - Self::Exclude(fields) => not_in(fields, start), + Self::Exclude(fields, _) => not_in(fields, start), + } + } + const fn need_skip_text(&self) -> bool { + match self { + // If we look only for tags, we should skip any $text keys + Self::Include(_) => true, + // If we look fo any data, we should exclude $text keys if it in the list + Self::Exclude(_, has_text_field) => *has_text_field, } } } @@ -944,9 +956,17 @@ where self.map.de.skip()?; continue; } + // Skip any text events if sequence expects only specific tag names + #[cfg(feature = "overlapped-lists")] + DeEvent::Text(_) if self.filter.need_skip_text() => { + self.map.de.skip()?; + continue; + } // Stop iteration when list elements ends #[cfg(not(feature = "overlapped-lists"))] DeEvent::Start(e) if !self.filter.is_suitable(e)? => Ok(None), + #[cfg(not(feature = "overlapped-lists"))] + DeEvent::Text(_) if self.filter.need_skip_text() => Ok(None), // Stop iteration after reaching a closing tag // The matching tag name is guaranteed by the reader