Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- Collect SDK information in profile chunks. ([#3882](https://github.com/getsentry/relay/pull/3882))
- Introduce `trim = "disabled"` type attribute to prevent trimming of spans. ([#3877](https://github.com/getsentry/relay/pull/3877))
- Make the tcp listen backlog configurable and raise the default to 1024. ([#3899](https://github.com/getsentry/relay/pull/3899))
- Add experimental support for MongoDB query normalization. ([#3912](https://github.com/getsentry/relay/pull/3912))
- Extract `user.geo.country_code` into span indexed. ([#3911](https://github.com/getsentry/relay/pull/3911))
- Add `span.system` tag to span metrics ([#3913](https://github.com/getsentry/relay/pull/3913))
- Switch glob implementations from `regex` to `regex-lite`. ([#3926](https://github.com/getsentry/relay/pull/3926))
Expand Down
2 changes: 2 additions & 0 deletions relay-cabi/src/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::sync::OnceLock;
use chrono::{DateTime, Utc};
use relay_cardinality::CardinalityLimit;
use relay_dynamic_config::{normalize_json, GlobalConfig, ProjectConfig};
use relay_event_normalization::span::description::ScrubMongoDescription;
use relay_event_normalization::{
normalize_event, validate_event, BreakdownsConfig, ClientHints, EventValidationConfig,
GeoIpLookup, NormalizationConfig, RawUserAgentInfo,
Expand Down Expand Up @@ -273,6 +274,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event(
normalize_spans: config.normalize_spans,
replay_id: config.replay_id,
span_allowed_hosts: &[], // only supported in relay
scrub_mongo_description: ScrubMongoDescription::Disabled, // only supported in relay
};
normalize_event(&mut event, &normalization_config);

Expand Down
14 changes: 4 additions & 10 deletions relay-dynamic-config/src/defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,7 @@ use crate::metrics::MetricSpec;
use crate::{Feature, GroupKey, MetricExtractionConfig, ProjectConfig, Tag, TagMapping};

/// A list of `span.op` patterns that indicate databases that should be skipped.
const DISABLED_DATABASES: &[&str] = &[
"*clickhouse*",
"*compile*",
"*mongodb*",
"*redis*",
"db.orm",
];
const DISABLED_DATABASES: &[&str] = &["*clickhouse*", "*compile*", "*redis*", "db.orm"];

/// A list of `span.op` patterns we want to enable for mobile.
const MOBILE_OPS: &[&str] = &[
Expand Down Expand Up @@ -121,9 +115,9 @@ pub fn hardcoded_span_metrics() -> Vec<(GroupKey, Vec<MetricSpec>, Vec<TagMappin
let is_ai = RuleCondition::glob("span.op", "ai.*");

let is_db = RuleCondition::eq("span.sentry_tags.category", "db")
& !(RuleCondition::eq("span.system", "mongodb")
| RuleCondition::glob("span.op", DISABLED_DATABASES)
| RuleCondition::glob("span.description", MONGODB_QUERIES));
& !RuleCondition::glob("span.op", DISABLED_DATABASES)
& (RuleCondition::eq("span.system", "mongodb")
| !RuleCondition::glob("span.description", MONGODB_QUERIES));
Comment on lines +120 to +121
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Took me a while to parse this condition, might benefit from a comment along the lines of "we disallow mongodb, unless span.system is set to mongodb".

let is_resource = RuleCondition::glob("span.op", RESOURCE_SPAN_OPS);

let is_cache = RuleCondition::glob("span.op", CACHE_SPAN_OPS);
Expand Down
7 changes: 7 additions & 0 deletions relay-dynamic-config/src/feature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ pub enum Feature {
#[serde(rename = "organizations:indexed-spans-extraction")]
ExtractSpansFromEvent,

/// Enables description scrubbing for MongoDB spans (and consequently, their presence in the
/// Queries module inside Sentry).
///
/// Serialized as `organizations:performance-queries-mongodb-extraction`.
#[serde(rename = "organizations:performance-queries-mongodb-extraction")]
ScrubMongoDBDescriptions,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this feature make sense the way it is right now?

It seems like mongodb is unconditionally enabled with this PR only the scrubbing is conditional. For me this seems like we should have a feature flag to enable/disable mongodb alltogether not just the scrubbing which is supposed to help us with cardinality.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's my understanding.

There are two places that Relay currently special-cases MongoDB. The first is disabled databases.

const DISABLED_DATABASES: &[&str] = &[
"*clickhouse*",
"*compile*",
"*mongodb*",
"*redis*",
"db.orm",
];

let is_db = RuleCondition::eq("span.sentry_tags.category", "db")
& !(RuleCondition::eq("span.system", "mongodb")
| RuleCondition::glob("span.op", DISABLED_DATABASES)
| RuleCondition::glob("span.description", MONGODB_QUERIES));

is_db is used to control the presence of certain metrics and tags in hardcoded_span_metrics.

This PR removes that check, which means we will produce more metrics. However, note that:

  • a number of SDKs (including v8 of the JS SDK) don't actually have mongodb as part of the span op, and were never subject to this check
  • high cardinality of the resulting metrics comes from the span.description tag, which is protected against separately (see following)

The second existing special case is in the span description scrubbing.

("db", sub) => {
if sub.contains("clickhouse")
|| sub.contains("mongodb")
|| sub.contains("redis")
|| is_legacy_activerecord(sub, db_system)
|| is_sql_mongodb(description, db_system)
{
None

We check again for mongodb in the op (which as I mentioned, is often already bypassed), but the is_sql_mongodb function catches them all at this point by checking for a db.system of mongodb or JSON-looking description. This is the handling we fall through to when the feature is off.


So:

  • This PR will change the situation from "some MongoDB spans are counted as is_db" to "all MongoDB spans are counted as is_db", but
  • There will be no increase in cardinality, as all MongoDB span descriptions will continue to be None in cases where the flag is not set.

In an ideal world, I would have kept the behaviour exactly the same and only conditionally changed DISABLED_DATABASES. But, looking at how hardcoded_span_metrics, I can't see any way to get feature flags down to it. So I reasoned that the outcomes above were probably acceptable, but I could definitely be wrong! Please let me know what you think. Thanks so much 🙏

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a number of SDKs (including v8 of the JS SDK) don't actually have mongodb as part of the span op, and were never subject to this check

I'd be worried about the Otel integrations here, not the SDKs necessarily.

high cardinality of the resulting metrics comes from the span.description tag, which is protected against separately (see following)

If we have some confidence in the scrubbing (which seems pretty sound to me, we can even start with a lower 'recursion limit'), I'd not use the feature flag at all. You will only find the outliers after it's too late anyways. And the feature flag just adds more conditionals and boilerplate while only toggling scrubbing not the full extraction.
That's the same approach that was taken for Redis.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


/// Deprecated, still forwarded for older downstream Relays.
#[doc(hidden)]
#[serde(rename = "organizations:transaction-name-mark-scrubbed-as-sanitized")]
Expand Down
6 changes: 6 additions & 0 deletions relay-event-normalization/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use uuid::Uuid;

use crate::normalize::request;
use crate::span::ai::normalize_ai_measurements;
use crate::span::description::ScrubMongoDescription;
use crate::span::tag_extraction::extract_span_tags_from_event;
use crate::utils::{self, get_event_user_tag, MAX_DURATION_MOBILE_MS};
use crate::{
Expand Down Expand Up @@ -158,6 +159,9 @@ pub struct NormalizationConfig<'a> {

/// Controls list of hosts to be excluded from scrubbing
pub span_allowed_hosts: &'a [Host],

/// Controls whether or not MongoDB span descriptions will be scrubbed.
pub scrub_mongo_description: ScrubMongoDescription,
}

impl<'a> Default for NormalizationConfig<'a> {
Expand Down Expand Up @@ -190,6 +194,7 @@ impl<'a> Default for NormalizationConfig<'a> {
normalize_spans: true,
replay_id: Default::default(),
span_allowed_hosts: Default::default(),
scrub_mongo_description: ScrubMongoDescription::Disabled,
}
}
}
Expand Down Expand Up @@ -332,6 +337,7 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) {
event,
config.max_tag_value_length,
config.span_allowed_hosts,
config.scrub_mongo_description.clone(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can derive the Copy trait on ScrubMongoDescription to make the .clone() unnecessary.

);
}

Expand Down
Loading