Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Only apply non-destructive PII rules to log bodies by default. ([#5272](https://github.com/getsentry/relay/pull/5272))
- Infer the client ip when set to `{{auto}}` for EAP items. ([#5304](https://github.com/getsentry/relay/pull/5304))
- Add `sentry.origin` attribute to OTLP spans. ([#5294](https://github.com/getsentry/relay/pull/5294))
- Normalize deprecated attribute names according to `sentry-conventions` for logs and V2 spans. ([#5257](https://github.com/getsentry/relay/pull/5257))

**Breaking Changes**:

Expand Down
178 changes: 173 additions & 5 deletions relay-event-normalization/src/eap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ use std::net::IpAddr;
use chrono::{DateTime, Utc};
use relay_common::time::UnixTimestamp;
use relay_conventions::{
BROWSER_NAME, BROWSER_VERSION, CLIENT_ADDRESS, OBSERVED_TIMESTAMP_NANOS, USER_AGENT_ORIGINAL,
USER_GEO_CITY, USER_GEO_COUNTRY_CODE, USER_GEO_REGION, USER_GEO_SUBDIVISION,
AttributeInfo, BROWSER_NAME, BROWSER_VERSION, CLIENT_ADDRESS, OBSERVED_TIMESTAMP_NANOS,
USER_AGENT_ORIGINAL, USER_GEO_CITY, USER_GEO_COUNTRY_CODE, USER_GEO_REGION,
USER_GEO_SUBDIVISION, WriteBehavior,
};
use relay_event_schema::protocol::{AttributeType, Attributes, BrowserContext, Geo};
use relay_protocol::{Annotated, ErrorKind, Value};
use relay_protocol::{Annotated, ErrorKind, Meta, Remark, RemarkType, Value};

use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};

Expand Down Expand Up @@ -164,6 +165,60 @@ pub fn normalize_user_geo(
attributes.insert_if_missing(USER_GEO_REGION, || geo.region);
}

/// Normalizes deprecated attributes according to `sentry-conventions`.
///
/// Attributes with a status of `"normalize"` will be moved to their replacement name.
/// If there is already a value present under the replacement name, it will be left alone,
/// but the deprecated attribute is removed anyway.
///
/// Attributes with a status of `"backfill"` will be copied to their replacement name if the
/// replacement name is not present. In any case, the original name is left alone.
pub fn normalize_attribute_names(attributes: &mut Annotated<Attributes>) {
normalize_attribute_names_inner(attributes, relay_conventions::attribute_info)
}

fn normalize_attribute_names_inner(
attributes: &mut Annotated<Attributes>,
attribute_info: fn(&str) -> Option<&'static AttributeInfo>,
) {
let Some(attributes) = attributes.value_mut() else {
return;
};

let attribute_names: Vec<_> = attributes.keys().cloned().collect();

for name in attribute_names {
let Some(attribute_info) = attribute_info(&name) else {
continue;
};

match attribute_info.write_behavior {
WriteBehavior::CurrentName => continue,
WriteBehavior::NewName(new_name) => {
let Some(old_attribute) = attributes.get_raw_mut(&name) else {
continue;
};

let mut meta = Meta::default();
// TODO: Possibly add a new RemarkType for "renamed/moved"
meta.add_remark(Remark::new(RemarkType::Removed, "attribute.deprecated"));
let new_attribute = std::mem::replace(old_attribute, Annotated(None, meta));

if !attributes.contains_key(new_name) {
attributes.insert_raw(new_name.to_owned(), new_attribute);
}
}
WriteBehavior::BothNames(new_name) => {
if !attributes.contains_key(new_name)
&& let Some(current_attribute) = attributes.get_raw(&name).cloned()
{
attributes.insert_raw(new_name.to_owned(), current_attribute);
}
}
}
}
}

#[cfg(test)]
mod tests {
use relay_protocol::SerializableAnnotated;
Expand Down Expand Up @@ -206,14 +261,14 @@ mod tests {
DateTime::from_timestamp_nanos(1_234_201_337),
);

insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
{
"sentry.observed_timestamp_nanos": {
"type": "string",
"value": "111222333"
}
}
"#);
"###);
}

#[test]
Expand Down Expand Up @@ -496,4 +551,117 @@ mod tests {
"#,
);
}

#[test]
fn test_normalize_attributes() {
fn mock_attribute_info(name: &str) -> Option<&'static AttributeInfo> {
use relay_conventions::Pii;

match name {
"replace.empty" => Some(&AttributeInfo {
write_behavior: WriteBehavior::NewName("replaced"),
pii: Pii::Maybe,
aliases: &["replaced"],
}),
"replace.existing" => Some(&AttributeInfo {
write_behavior: WriteBehavior::NewName("not.replaced"),
pii: Pii::Maybe,
aliases: &["not.replaced"],
}),
"backfill.empty" => Some(&AttributeInfo {
write_behavior: WriteBehavior::BothNames("backfilled"),
pii: Pii::Maybe,
aliases: &["backfilled"],
}),
"backfill.existing" => Some(&AttributeInfo {
write_behavior: WriteBehavior::BothNames("not.backfilled"),
pii: Pii::Maybe,
aliases: &["not.backfilled"],
}),
_ => None,
}
}

let mut attributes = Annotated::new(Attributes::from([
(
"replace.empty".to_owned(),
Annotated::new("Should be moved".to_owned().into()),
),
(
"replace.existing".to_owned(),
Annotated::new("Should be removed".to_owned().into()),
),
(
"not.replaced".to_owned(),
Annotated::new("Should be left alone".to_owned().into()),
),
(
"backfill.empty".to_owned(),
Annotated::new("Should be copied".to_owned().into()),
),
(
"backfill.existing".to_owned(),
Annotated::new("Should be left alone".to_owned().into()),
),
(
"not.backfilled".to_owned(),
Annotated::new("Should be left alone".to_owned().into()),
),
]));

normalize_attribute_names_inner(&mut attributes, mock_attribute_info);

insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
{
"backfill.empty": {
"type": "string",
"value": "Should be copied"
},
"backfill.existing": {
"type": "string",
"value": "Should be left alone"
},
"backfilled": {
"type": "string",
"value": "Should be copied"
},
"not.backfilled": {
"type": "string",
"value": "Should be left alone"
},
"not.replaced": {
"type": "string",
"value": "Should be left alone"
},
"replace.empty": null,
"replace.existing": null,
"replaced": {
"type": "string",
"value": "Should be moved"
},
"_meta": {
"replace.empty": {
"": {
"rem": [
[
"attribute.deprecated",
"x"
]
]
}
},
"replace.existing": {
"": {
"rem": [
[
"attribute.deprecated",
"x"
]
]
}
}
}
}
"###);
}
}
31 changes: 31 additions & 0 deletions relay-event-schema/src/protocol/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,24 @@ impl Attributes {
Some(&self.0.get(key)?.value()?.value.value)
}

/// Returns the annotated attribute with the given key.
pub fn get_raw<Q>(&self, key: &Q) -> Option<&Annotated<Attribute>>
where
String: Borrow<Q>,
Q: Ord + ?Sized,
{
self.0.get(key)
}

/// Mutably returns the annotated attribute with the given key.
pub fn get_raw_mut<Q>(&mut self, key: &Q) -> Option<&mut Annotated<Attribute>>
where
String: Borrow<Q>,
Q: Ord + ?Sized,
{
self.0.get_mut(key)
}

/// Inserts an attribute with the given value into this collection.
pub fn insert<K: Into<String>, V: Into<AttributeValue>>(&mut self, key: K, value: V) {
fn inner(slf: &mut Attributes, key: String, value: AttributeValue) {
Expand Down Expand Up @@ -296,6 +314,19 @@ impl Attributes {
) -> std::collections::btree_map::IterMut<'_, String, Annotated<Attribute>> {
self.0.iter_mut()
}

/// Returns an iterator over the keys in this collection.
pub fn keys(&self) -> std::collections::btree_map::Keys<'_, String, Annotated<Attribute>> {
self.0.keys()
}

/// Provides mutable access to an entry in this collection.
pub fn entry(
&mut self,
key: String,
) -> std::collections::btree_map::Entry<'_, String, Annotated<Attribute>> {
self.0.entry(key)
}
}

impl IntoIterator for Attributes {
Expand Down
3 changes: 2 additions & 1 deletion relay-server/src/processing/logs/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,11 @@ fn scrub_log(log: &mut Annotated<OurLog>, ctx: Context<'_>) -> Result<()> {

fn normalize_log(log: &mut Annotated<OurLog>, meta: &RequestMeta) -> Result<()> {
if let Some(log) = log.value_mut() {
eap::normalize_attribute_types(&mut log.attributes);
eap::normalize_attribute_names(&mut log.attributes);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Deprecation Normalization Timing Issue

The call to eap::normalize_attribute_names() at line 113 runs before eap::normalize_received() at line 114. However, normalize_received() inserts the sentry.observed_timestamp_nanos attribute, which may be marked as deprecated in sentry-conventions. Since the deprecation normalization has already run, any attribute inserted by normalize_received() will not be subject to the deprecation normalization (renaming/backfilling), creating an inconsistency where deprecated attributes inserted by normalize_received() remain in their deprecated form. The deprecation normalization should run AFTER all attributes are populated.

Fix in Cursor Fix in Web

eap::normalize_received(&mut log.attributes, meta.received_at());
eap::normalize_client_address(&mut log.attributes, meta.client_addr());
eap::normalize_user_agent(&mut log.attributes, meta.user_agent(), meta.client_hints());
eap::normalize_attribute_types(&mut log.attributes);
}

process_value(
Expand Down
2 changes: 1 addition & 1 deletion relay-server/src/processing/logs/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::envelope::WithHeader;

/// Returns the calculated size of a [`OurLog`].
///
/// Unlike [`relay_ourlogs::calculate_size`], this access the already manifested byte size
/// Unlike [`relay_ourlogs::calculate_size`], this accesses the already manifested byte size
/// of the log, instead of calculating it.
///
/// When compiled with debug assertion the function asserts the presence of a manifested byte size.
Expand Down
3 changes: 2 additions & 1 deletion relay-server/src/processing/spans/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ fn normalize_span(
// TODO: `validate_span()` (start/end timestamps)

if let Some(span) = span.value_mut() {
eap::normalize_attribute_types(&mut span.attributes);
eap::normalize_attribute_names(&mut span.attributes);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Deprecation Normalization Timing Issue

The call to eap::normalize_attribute_names() at line 92 runs before eap::normalize_received() at line 93. However, normalize_received() inserts the sentry.observed_timestamp_nanos attribute, which may be marked as deprecated in sentry-conventions. Since the deprecation normalization has already run, any attribute inserted by normalize_received() will not be subject to the deprecation normalization (renaming/backfilling), creating an inconsistency where deprecated attributes inserted by normalize_received() remain in their deprecated form. The deprecation normalization should run AFTER all attributes are populated.

Fix in Cursor Fix in Web

eap::normalize_received(&mut span.attributes, meta.received_at());
eap::normalize_client_address(&mut span.attributes, meta.client_addr());
eap::normalize_user_agent(&mut span.attributes, meta.user_agent(), meta.client_hints());
eap::normalize_attribute_types(&mut span.attributes);
eap::normalize_user_geo(&mut span.attributes, || {
meta.client_addr().and_then(|ip| geo_lookup.lookup(ip))
});
Expand Down
28 changes: 8 additions & 20 deletions tests/integration/test_ourlogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,16 @@ def test_ourlog_multiple_containers_not_allowed(
@pytest.mark.parametrize(
"external_mode,expected_byte_size",
[
# 260 here is a billing relevant metric, do not arbitrarily change it,
# 296 here is a billing relevant metric, do not arbitrarily change it,
# this value is supposed to be static and purely based on data received,
# independent of any normalization.
(None, 260),
(None, 296),
# Same applies as above, a proxy Relay does not need to run normalization.
("proxy", 260),
("proxy", 296),
# If an external Relay/Client makes modifications, sizes can change,
# this is fuzzy due to slight changes in sizes due to added timestamps
# and may need to be adjusted when changing normalization.
("managed", 454),
("managed", 521),
],
)
def test_ourlog_extraction_with_sentry_logs(
Expand Down Expand Up @@ -195,6 +195,7 @@ def test_ourlog_extraction_with_sentry_logs(
"string.attribute": {"value": "some string", "type": "string"},
"pii": {"value": "4242 4242 4242 4242", "type": "string"},
"sentry.severity_text": {"value": "info", "type": "string"},
"http.response_content_length": {"value": 17, "type": "integer"},
"unknown_type": {"value": "info", "type": "unknown"},
"broken_type": {"value": "info", "type": "not_a_real_type"},
"mismatched_type": {"value": "some string", "type": "boolean"},
Expand Down Expand Up @@ -258,6 +259,8 @@ def test_ourlog_extraction_with_sentry_logs(
"sentry.browser.version": {"stringValue": "2.32"},
"sentry.severity_text": {"stringValue": "info"},
"sentry.payload_size_bytes": {"intValue": mock.ANY},
"http.response_content_length": {"intValue": "17"},
"http.response.body.size": {"intValue": "17"},
"sentry.span_id": {"stringValue": "eee19b7ec3c1b174"},
"string.attribute": {"stringValue": "some string"},
"valid_string_with_other": {"stringValue": "test"},
Expand Down Expand Up @@ -540,25 +543,10 @@ def test_ourlog_extraction_default_pii_scrubbing_does_not_scrub_default_attribut
"custom_field": {"stringValue": "[REDACTED]"},
"sentry.body": {"stringValue": "Test log"},
"sentry.severity_text": {"stringValue": "info"},
"sentry.observed_timestamp_nanos": {
"stringValue": time_within_delta(
ts,
delta=timedelta(seconds=1),
expect_resolution="ns",
precision="us",
)
},
"sentry.span_id": {"stringValue": "eee19b7ec3c1b174"},
"sentry.payload_size_bytes": mock.ANY,
"sentry.browser.name": {"stringValue": "Python Requests"},
"sentry.timestamp_precise": {
"intValue": time_within_delta(
ts,
delta=timedelta(seconds=0),
expect_resolution="ns",
precision="us",
)
},
**timestamps(ts),
},
"clientSampleRate": 1.0,
"itemId": mock.ANY,
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/test_spansv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def test_spansv2_basic(
"attributes": {
"foo": {"value": "bar", "type": "string"},
"invalid": {"value": True, "type": "string"},
"http.response_content_length": {"value": 17, "type": "integer"},
},
},
trace_info={
Expand All @@ -87,6 +88,8 @@ def test_spansv2_basic(
"span_id": "eee19b7ec3c1b175",
"attributes": {
"foo": {"type": "string", "value": "bar"},
"http.response_content_length": {"value": 17, "type": "integer"},
"http.response.body.size": {"value": 17, "type": "integer"},
"invalid": None,
"sentry.browser.name": {"type": "string", "value": "Python Requests"},
"sentry.browser.version": {"type": "string", "value": "2.32"},
Expand Down
Loading