Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Only apply non-destructive PII rules to log bodies by default. ([#5272](https://github.com/getsentry/relay/pull/5272))
- Infer the client ip when set to `{{auto}}` for EAP items. ([#5304](https://github.com/getsentry/relay/pull/5304))
- Add `sentry.origin` attribute to OTLP spans. ([#5294](https://github.com/getsentry/relay/pull/5294))
- Normalize deprecated attribute names according to `sentry-conventions` for logs and V2 spans. ([#5257](https://github.com/getsentry/relay/pull/5257))

**Breaking Changes**:

Expand Down
176 changes: 171 additions & 5 deletions relay-event-normalization/src/eap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ use std::net::IpAddr;
use chrono::{DateTime, Utc};
use relay_common::time::UnixTimestamp;
use relay_conventions::{
BROWSER_NAME, BROWSER_VERSION, CLIENT_ADDRESS, OBSERVED_TIMESTAMP_NANOS, USER_AGENT_ORIGINAL,
USER_GEO_CITY, USER_GEO_COUNTRY_CODE, USER_GEO_REGION, USER_GEO_SUBDIVISION,
AttributeInfo, BROWSER_NAME, BROWSER_VERSION, CLIENT_ADDRESS, OBSERVED_TIMESTAMP_NANOS,
USER_AGENT_ORIGINAL, USER_GEO_CITY, USER_GEO_COUNTRY_CODE, USER_GEO_REGION,
USER_GEO_SUBDIVISION, WriteBehavior,
};
use relay_event_schema::protocol::{AttributeType, Attributes, BrowserContext, Geo};
use relay_protocol::{Annotated, ErrorKind, Value};
use relay_protocol::{Annotated, ErrorKind, Remark, RemarkType, Value};

use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};

Expand Down Expand Up @@ -164,6 +165,58 @@ pub fn normalize_user_geo(
attributes.insert_if_missing(USER_GEO_REGION, || geo.region);
}

/// Normalizes deprecated attributes according to `sentry-conventions`.
///
/// Attributes with a status of `"normalize"` will be moved to their replacement name.
/// If there is already a value present under the replacement name, it will be left alone,
/// but the deprecated attribute is removed anyway.
///
/// Attributes with a status of `"backfill"` will be copied to their replacement name if the
/// replacement name is not present. In any case, the original name is left alone.
pub fn normalize_attribute_names(attributes: &mut Annotated<Attributes>) {
normalize_attribute_names_inner(attributes, relay_conventions::attribute_info)
}

fn normalize_attribute_names_inner(
attributes: &mut Annotated<Attributes>,
attribute_info: fn(&str) -> Option<&'static AttributeInfo>,
) {
let attributes = attributes.get_or_insert_with(Default::default);
let attribute_names: Vec<_> = attributes.keys().cloned().collect();

for name in attribute_names {
let Some(attribute_info) = attribute_info(&name) else {
continue;
};

match attribute_info.write_behavior {
WriteBehavior::CurrentName => continue,
WriteBehavior::NewName(new_name) => {
let Some(old_attribute) = attributes.get_raw_mut(&name) else {
continue;
};

let new_attribute = old_attribute.clone();
old_attribute.set_value(None);
old_attribute
.meta_mut()
.add_remark(Remark::new(RemarkType::Removed, "attribute.deprecated"));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there something better that we could add to this remark as the "rule ID"?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'd want to differentiate here on the remarks:

  • removed
  • moved (renamed)
  • removed, not moved (because new attribute already exists)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. What is the difference between the first and the third?
  2. Which RemarkType would you use for the second?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the difference between the first and the third?

  1. it's just a deletion
  2. it was supposed to be a rename, but the target already existed so it was removed. This rename does degrade to a remove, but to better understand what it is happening, I imagine a hint that something was supposed to be moved but it degraded to a remove is helpful.

Which RemarkType would you use for the second?

Removed fits closest imo, we can further differentiate on the rule_id imo. Not sure if it's worth introducing a rename, when that is essentially a just a remove.
Alternatively, we can also only mark the new value that it was renamed.


if !attributes.contains_key(new_name) {
attributes.insert_raw(new_name.to_owned(), new_attribute);
}
}
WriteBehavior::BothNames(new_name) => {
if !attributes.contains_key(new_name)
&& let Some(current_attribute) = attributes.get_raw(&name).cloned()
{
attributes.insert_raw(new_name.to_owned(), current_attribute);
}
}
}
}
}

#[cfg(test)]
mod tests {
use relay_protocol::SerializableAnnotated;
Expand Down Expand Up @@ -206,14 +259,14 @@ mod tests {
DateTime::from_timestamp_nanos(1_234_201_337),
);

insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
{
"sentry.observed_timestamp_nanos": {
"type": "string",
"value": "111222333"
}
}
"#);
"###);
}

#[test]
Expand Down Expand Up @@ -496,4 +549,117 @@ mod tests {
"#,
);
}

#[test]
fn test_normalize_attributes() {
fn mock_attribute_info(name: &str) -> Option<&'static AttributeInfo> {
use relay_conventions::Pii;

match name {
"replace.empty" => Some(&AttributeInfo {
write_behavior: WriteBehavior::NewName("replaced"),
pii: Pii::Maybe,
aliases: &["replaced"],
}),
"replace.existing" => Some(&AttributeInfo {
write_behavior: WriteBehavior::NewName("not.replaced"),
pii: Pii::Maybe,
aliases: &["not.replaced"],
}),
"backfill.empty" => Some(&AttributeInfo {
write_behavior: WriteBehavior::BothNames("backfilled"),
pii: Pii::Maybe,
aliases: &["backfilled"],
}),
"backfill.existing" => Some(&AttributeInfo {
write_behavior: WriteBehavior::BothNames("not.backfilled"),
pii: Pii::Maybe,
aliases: &["not.backfilled"],
}),
_ => None,
}
}

let mut attributes = Annotated::new(Attributes::from([
(
"replace.empty".to_owned(),
Annotated::new("Should be moved".to_owned().into()),
),
(
"replace.existing".to_owned(),
Annotated::new("Should be removed".to_owned().into()),
),
(
"not.replaced".to_owned(),
Annotated::new("Should be left alone".to_owned().into()),
),
(
"backfill.empty".to_owned(),
Annotated::new("Should be copied".to_owned().into()),
),
(
"backfill.existing".to_owned(),
Annotated::new("Should be left alone".to_owned().into()),
),
(
"not.backfilled".to_owned(),
Annotated::new("Should be left alone".to_owned().into()),
),
]));

normalize_attribute_names_inner(&mut attributes, mock_attribute_info);

insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
{
"backfill.empty": {
"type": "string",
"value": "Should be copied"
},
"backfill.existing": {
"type": "string",
"value": "Should be left alone"
},
"backfilled": {
"type": "string",
"value": "Should be copied"
},
"not.backfilled": {
"type": "string",
"value": "Should be left alone"
},
"not.replaced": {
"type": "string",
"value": "Should be left alone"
},
"replace.empty": null,
"replace.existing": null,
"replaced": {
"type": "string",
"value": "Should be moved"
},
"_meta": {
"replace.empty": {
"": {
"rem": [
[
"attribute.deprecated",
"x"
]
]
}
},
"replace.existing": {
"": {
"rem": [
[
"attribute.deprecated",
"x"
]
]
}
}
}
}
"###);
}
}
31 changes: 31 additions & 0 deletions relay-event-schema/src/protocol/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,24 @@ impl Attributes {
Some(&self.0.get(key)?.value()?.value.value)
}

/// Returns the annotated attribute with the given key.
pub fn get_raw<Q>(&self, key: &Q) -> Option<&Annotated<Attribute>>
where
String: Borrow<Q>,
Q: Ord + ?Sized,
{
self.0.get(key)
}

/// Mutably returns the annotated attribute with the given key.
pub fn get_raw_mut<Q>(&mut self, key: &Q) -> Option<&mut Annotated<Attribute>>
where
String: Borrow<Q>,
Q: Ord + ?Sized,
{
self.0.get_mut(key)
}

/// Inserts an attribute with the given value into this collection.
pub fn insert<K: Into<String>, V: Into<AttributeValue>>(&mut self, key: K, value: V) {
fn inner(slf: &mut Attributes, key: String, value: AttributeValue) {
Expand Down Expand Up @@ -296,6 +314,19 @@ impl Attributes {
) -> std::collections::btree_map::IterMut<'_, String, Annotated<Attribute>> {
self.0.iter_mut()
}

/// Returns an iterator over the keys in this collection.
pub fn keys(&self) -> std::collections::btree_map::Keys<'_, String, Annotated<Attribute>> {
self.0.keys()
}

/// Provides mutable access to an entry in this collection.
pub fn entry(
&mut self,
key: String,
) -> std::collections::btree_map::Entry<'_, String, Annotated<Attribute>> {
self.0.entry(key)
}
}

impl IntoIterator for Attributes {
Expand Down
3 changes: 2 additions & 1 deletion relay-server/src/processing/logs/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,11 @@ fn scrub_log(log: &mut Annotated<OurLog>, ctx: Context<'_>) -> Result<()> {

fn normalize_log(log: &mut Annotated<OurLog>, meta: &RequestMeta) -> Result<()> {
if let Some(log) = log.value_mut() {
eap::normalize_attribute_types(&mut log.attributes);
eap::normalize_attribute_names(&mut log.attributes);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Deprecation Normalization Timing Issue

The call to eap::normalize_attribute_names() at line 113 runs before eap::normalize_received() at line 114. However, normalize_received() inserts the sentry.observed_timestamp_nanos attribute, which may be marked as deprecated in sentry-conventions. Since the deprecation normalization has already run, any attribute inserted by normalize_received() will not be subject to the deprecation normalization (renaming/backfilling), creating an inconsistency where deprecated attributes inserted by normalize_received() remain in their deprecated form. The deprecation normalization should run AFTER all attributes are populated.

Fix in Cursor Fix in Web

eap::normalize_received(&mut log.attributes, meta.received_at());
eap::normalize_client_address(&mut log.attributes, meta.client_addr());
eap::normalize_user_agent(&mut log.attributes, meta.user_agent(), meta.client_hints());
eap::normalize_attribute_types(&mut log.attributes);
}

process_value(
Expand Down
2 changes: 1 addition & 1 deletion relay-server/src/processing/logs/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::envelope::WithHeader;

/// Returns the calculated size of a [`OurLog`].
///
/// Unlike [`relay_ourlogs::calculate_size`], this access the already manifested byte size
/// Unlike [`relay_ourlogs::calculate_size`], this accesses the already manifested byte size
/// of the log, instead of calculating it.
///
/// When compiled with debug assertion the function asserts the presence of a manifested byte size.
Expand Down
3 changes: 2 additions & 1 deletion relay-server/src/processing/spans/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ fn normalize_span(
// TODO: `validate_span()` (start/end timestamps)

if let Some(span) = span.value_mut() {
eap::normalize_attribute_types(&mut span.attributes);
eap::normalize_attribute_names(&mut span.attributes);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Deprecation Normalization Timing Issue

The call to eap::normalize_attribute_names() at line 92 runs before eap::normalize_received() at line 93. However, normalize_received() inserts the sentry.observed_timestamp_nanos attribute, which may be marked as deprecated in sentry-conventions. Since the deprecation normalization has already run, any attribute inserted by normalize_received() will not be subject to the deprecation normalization (renaming/backfilling), creating an inconsistency where deprecated attributes inserted by normalize_received() remain in their deprecated form. The deprecation normalization should run AFTER all attributes are populated.

Fix in Cursor Fix in Web

eap::normalize_received(&mut span.attributes, meta.received_at());
eap::normalize_client_address(&mut span.attributes, meta.client_addr());
eap::normalize_user_agent(&mut span.attributes, meta.user_agent(), meta.client_hints());
eap::normalize_attribute_types(&mut span.attributes);
eap::normalize_user_geo(&mut span.attributes, || {
meta.client_addr().and_then(|ip| geo_lookup.lookup(ip))
});
Expand Down
28 changes: 8 additions & 20 deletions tests/integration/test_ourlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,16 @@ def test_ourlog_multiple_containers_not_allowed(
@pytest.mark.parametrize(
"external_mode,expected_byte_size",
[
# 260 here is a billing relevant metric, do not arbitrarily change it,
# 296 here is a billing relevant metric, do not arbitrarily change it,
# this value is supposed to be static and purely based on data received,
# independent of any normalization.
(None, 260),
(None, 296),
# Same applies as above, a proxy Relay does not need to run normalization.
("proxy", 260),
("proxy", 296),
# If an external Relay/Client makes modifications, sizes can change,
# this is fuzzy due to slight changes in sizes due to added timestamps
# and may need to be adjusted when changing normalization.
("managed", 454),
("managed", 521),
],
)
def test_ourlog_extraction_with_sentry_logs(
Expand Down Expand Up @@ -195,6 +195,7 @@ def test_ourlog_extraction_with_sentry_logs(
"string.attribute": {"value": "some string", "type": "string"},
"pii": {"value": "4242 4242 4242 4242", "type": "string"},
"sentry.severity_text": {"value": "info", "type": "string"},
"http.response_content_length": {"value": 17, "type": "integer"},
"unknown_type": {"value": "info", "type": "unknown"},
"broken_type": {"value": "info", "type": "not_a_real_type"},
"mismatched_type": {"value": "some string", "type": "boolean"},
Expand Down Expand Up @@ -258,6 +259,8 @@ def test_ourlog_extraction_with_sentry_logs(
"sentry.browser.version": {"stringValue": "2.32"},
"sentry.severity_text": {"stringValue": "info"},
"sentry.payload_size_bytes": {"intValue": mock.ANY},
"http.response_content_length": {"intValue": "17"},
"http.response.body.size": {"intValue": "17"},
"sentry.span_id": {"stringValue": "eee19b7ec3c1b174"},
"string.attribute": {"stringValue": "some string"},
"valid_string_with_other": {"stringValue": "test"},
Expand Down Expand Up @@ -540,25 +543,10 @@ def test_ourlog_extraction_default_pii_scrubbing_does_not_scrub_default_attribut
"custom_field": {"stringValue": "[REDACTED]"},
"sentry.body": {"stringValue": "Test log"},
"sentry.severity_text": {"stringValue": "info"},
"sentry.observed_timestamp_nanos": {
"stringValue": time_within_delta(
ts,
delta=timedelta(seconds=1),
expect_resolution="ns",
precision="us",
)
},
"sentry.span_id": {"stringValue": "eee19b7ec3c1b174"},
"sentry.payload_size_bytes": mock.ANY,
"sentry.browser.name": {"stringValue": "Python Requests"},
"sentry.timestamp_precise": {
"intValue": time_within_delta(
ts,
delta=timedelta(seconds=0),
expect_resolution="ns",
precision="us",
)
},
**timestamps(ts),
},
"clientSampleRate": 1.0,
"itemId": mock.ANY,
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/test_spansv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_spansv2_basic(
"attributes": {
"foo": {"value": "bar", "type": "string"},
"invalid": {"value": True, "type": "string"},
"http.response_content_length": {"value": 17, "type": "integer"},
},
},
trace_info={
Expand All @@ -87,6 +88,8 @@ def test_spansv2_basic(
"span_id": "eee19b7ec3c1b175",
"attributes": {
"foo": {"type": "string", "value": "bar"},
"http.response_content_length": {"value": 17, "type": "integer"},
"http.response.body.size": {"value": 17, "type": "integer"},
"invalid": None,
"sentry.browser.name": {"type": "string", "value": "Python Requests"},
"sentry.browser.version": {"type": "string", "value": "2.32"},
Expand Down