Merge #155413 #155958

craig[bot] · sumeerbhola · spilchen · craig[bot] · commit ed1684cbd102 · 2025-10-27T14:55:35.000Z
155413: tests: use admission.io.overload in admission-control/elastic-io r=tbg a=sumeerbhola The previously used sub-level metric was flawed, in that the IO overload score could stay low even at higher sub-level counts if L0 had very few bytes (which is a deliberate choice in admission control). So admission control would not throttle elastic work as aggressively as the test expected it to. Running with this change, we don't exceed a score of 0.15, while the previously used sub-level count metric spikes higher. For example: ``` 2025/10/14 22:13:15 admission_control_elastic_io.go:105: admission_io_overload(store=1): 0.100000 2025/10/14 22:13:25 admission_control_elastic_io.go:105: admission_io_overload(store=1): 0.100000 I251014 22:13:27.665296 868 util/admission/io_load_listener.go:780 ⋮ [T1,Vsystem,n1,s1] 2918 IO overload: compaction score 0.150 (131 ssts, 9 sub-levels), L0 growth 551 MiB (write 551 MiB (ignored 0 B) ingest 0 B (ignored 0 B)): requests 15985 (0 bypassed) with 505 MiB acc-write (0 B bypassed) + 0 B acc-ingest (0 B bypassed) + 551 MiB adjusted-LSM-writes + 4.2 GiB adjusted-disk-writes + write-model 1.09x+1 B (smoothed 1.08x+1 B) + l0-ingest-model 0.00x+0 B (smoothed 0.75x+1 B) + ingest-model 0.00x+0 B (smoothed 1.00x+1 B) + write-amp-model 7.87x+1 B (smoothed 8.01x+1 B) + at-admission-tokens 126 B, compacted 550 MiB [≈545 MiB], flushed 799 MiB [≈838 MiB] (mult 0.77); admitting 649 MiB (rate 43 MiB/s) (elastic 519 MiB rate 35 MiB/s) due to memtable flush (multiplier 0.775) (used total: 543 MiB elastic 541 MiB); write stalls 0; diskBandwidthLimiter (unlimited) (tokenUtilization 0.00, tokensUsed (elastic 0 B, snapshot 0 B, regular 0 B) tokens (write 0 B (prev 0 B), read 0 B (prev 0 B)), writeBW 0 B/s, readBW 0 B/s, provisioned 0 B/s) 2025/10/14 22:13:35 admission_control_elastic_io.go:105: admission_io_overload(store=1): 0.050000 I251014 22:13:42.666326 868 util/admission/io_load_listener.go:780 ⋮ [T1,Vsystem,n1,s1] 2926 IO overload: compaction score 0.050 (70 ssts, 5 sub-levels), L0 growth 498 MiB (write 498 MiB (ignored 0 B) ingest 0 B (ignored 0 B)): requests 15228 (0 bypassed) with 480 MiB acc-write (0 B bypassed) + 0 B acc-ingest (0 B bypassed) + 498 MiB adjusted-LSM-writes + 4.2 GiB adjusted-disk-writes + write-model 1.04x+1 B (smoothed 1.06x+1 B) + l0-ingest-model 0.00x+0 B (smoothed 0.75x+1 B) + ingest-model 0.00x+0 B (smoothed 1.00x+1 B) + write-amp-model 8.57x+1 B (smoothed 8.29x+1 B) + at-admission-tokens 153 B, compacted 498 MiB [≈522 MiB], flushed 883 MiB [≈860 MiB] (mult 0.77); admitting 667 MiB (rate 44 MiB/s) (elastic 533 MiB rate 36 MiB/s) due to memtable flush (multiplier 0.775) (used total: 519 MiB elastic 517 MiB); write stalls 0; diskBandwidthLimiter (unlimited) (tokenUtilization 0.00, tokensUsed (elastic 0 B, snapshot 0 B, regular 0 B) tokens (write 0 B (prev 0 B), read 0 B (prev 0 B)), writeBW 0 B/s, readBW 0 B/s, provisioned 0 B/s) ``` Fixes #148786 Fixes #156168 Fixes #156215 Epic: none Release note: None 155958: roachtestutil: use DETACHED option for INSPECT jobs r=spilchen a=spilchen Previously, CheckInspectDatabase used a statement timeout hack to background INSPECT jobs - it set a 5-second timeout and relied on the timeout error to leave jobs running. Now that INSPECT supports the DETACHED option, we use `INSPECT DATABASE <name> WITH OPTIONS DETACHED` to properly run jobs in the background. This provides a cleaner way to background the job. Informs #155676 Epic: CRDB-55075 Release note: none Co-authored-by: sumeerbhola <sumeer@cockroachlabs.com> Co-authored-by: Matt Spilchen <matt.spilchen@cockroachlabs.com>
diff --git a/pkg/cmd/roachtest/roachtestutil/validation_check.go b/pkg/cmd/roachtest/roachtestutil/validation_check.go
@@ -206,17 +206,6 @@ func discoverUserDatabases(ctx context.Context, db *gosql.DB) ([]string, error)
 	return databases, rows.Err()
 }
 
-// isStatementTimeoutError returns true if the error is a statement timeout error.
-// Statement timeout errors are expected when launching INSPECT jobs since we only
-// want to start the job, not wait for it to complete.
-func isStatementTimeoutError(err error) bool {
-	var pqErr *pq.Error
-	if errors.As(err, &pqErr) {
-		return pgcode.MakeCode(string(pqErr.Code)) == pgcode.QueryCanceled
-	}
-	return false
-}
-
 // isFeatureNotSupportedError returns true if the error is a feature not supported error.
 // This can occur when the cluster version is not yet upgraded to support INSPECT.
 func isFeatureNotSupportedError(err error) bool {
@@ -229,8 +218,7 @@ func isFeatureNotSupportedError(err error) bool {
 
 // launchInspectJobs launches INSPECT DATABASE commands in parallel for all
 // provided databases using task manager for concurrency control. Each INSPECT
-// command enables the inspect command for that connection. Statement timeout
-// errors are ignored as they indicate the job was successfully started.
+// command is launched with the DETACHED option to run in the background.
 // Feature not supported errors are returned to the caller, indicating the
 // cluster version does not support INSPECT.
 func launchInspectJobs(
@@ -242,7 +230,6 @@ func launchInspectJobs(
 		return errors.Wrap(err, "failed to disable INSPECT admission control")
 	}
 
-	statementTimeout := 5 * time.Second
 	tm := task.NewManager(ctx, l)
 	g := tm.NewErrorGroup()
 
@@ -253,30 +240,13 @@ func launchInspectJobs(
 
 			statements := []string{
 				"SET enable_inspect_command = true",
-				fmt.Sprintf("SET statement_timeout = '%s'", statementTimeout.String()),
-				fmt.Sprintf("INSPECT DATABASE %s", lexbase.EscapeSQLIdent(dbName)),
+				fmt.Sprintf("INSPECT DATABASE %s WITH OPTIONS DETACHED", lexbase.EscapeSQLIdent(dbName)),
 			}
 
-			var stmtErr error
 			for _, stmt := range statements {
 				if _, err := db.ExecContext(ctx, stmt); err != nil {
-					stmtErr = err
-					break
-				}
-			}
-
-			// Always reset statement timeout back to default.
-			if _, err := db.ExecContext(ctx, "RESET statement_timeout"); err != nil {
-				l.Printf("Warning: failed to reset statement timeout: %v", err)
-			}
-
-			// Check for errors from the statements loop.
-			if stmtErr != nil {
-				// Statement timeout is expected - it means the job started but didn't complete
-				// within the timeout. The job is still running in the background.
-				if !isStatementTimeoutError(stmtErr) {
-					l.Printf("INSPECT DATABASE %s failed to start: %v", dbName, stmtErr)
-					return errors.Wrapf(stmtErr, "failed to start INSPECT DATABASE %s", dbName)
+					l.Printf("INSPECT DATABASE %s failed to start: %v", dbName, err)
+					return errors.Wrapf(err, "failed to start INSPECT DATABASE %s", dbName)
 				}
 			}
 
diff --git a/pkg/cmd/roachtest/tests/admission_control_elastic_io.go b/pkg/cmd/roachtest/tests/admission_control_elastic_io.go
@@ -85,7 +85,7 @@ func registerElasticIO(r registry.Registry) {
 				return nil
 			})
 			m.Go(func(ctx context.Context) error {
-				const subLevelMetric = "storage_l0_sublevels"
+				const ioOverloadMetric = "admission_io_overload"
 				getMetricVal := func(metricName string) (float64, error) {
 					point, err := statCollector.CollectPoint(ctx, t.L(), timeutil.Now(), metricName)
 					if err != nil {
@@ -110,16 +110,16 @@ func registerElasticIO(r registry.Registry) {
 				}
 				now := timeutil.Now()
 				endTime := now.Add(duration)
-				// We typically see fluctuations from 1 to 5 sub-levels because the
-				// elastic IO token logic gives 1.25*compaction-bandwidth tokens at 1
-				// sub-level and 0.75*compaction-bandwidth at 5 sub-levels, with 5
-				// sub-levels being very rare. We leave some breathing room and pick a
-				// threshold of greater than 7 to fail the test. If elastic tokens are
-				// not working, the threshold of 7 will be easily breached, since
-				// regular tokens allow sub-levels to exceed 10.
-				const subLevelThreshold = 7
-				const sampleCountForL0Sublevel = 12
-				var l0SublevelCount []float64
+				// We typically see fluctuations from 0.05 to 0.25 IO overload score
+				// because the elastic IO token logic gives 1.25*compaction-bandwidth
+				// tokens at 0.05 score and 0.75*compaction-bandwidth at 0.25 score,
+				// with 0.25 score being very rare. We leave some breathing room and
+				// pick a threshold of greater than 0.35 to fail the test. If elastic
+				// tokens are not working, the threshold of 0.35 will be easily
+				// breached, since regular tokens allow the score to exceed 0.5.
+				const ioOverloadThreshold = 0.35
+				const sampleCountForIOOverload = 12
+				var ioOverloadScore []float64
 				// Sleep initially for stability to be achieved, before measuring.
 				time.Sleep(5 * time.Minute)
 				for {
@@ -129,17 +129,19 @@ func registerElasticIO(r registry.Registry) {
 					default:
 					}
 					time.Sleep(10 * time.Second)
-					val, err := getMetricVal(subLevelMetric)
+					val, err := getMetricVal(ioOverloadMetric)
 					if err != nil {
 						continue
 					}
-					l0SublevelCount = append(l0SublevelCount, val)
+					ioOverloadScore = append(ioOverloadScore, val)
 					// We want to use the mean of the last 2m of data to avoid short-lived
 					// spikes causing failures.
-					if len(l0SublevelCount) >= sampleCountForL0Sublevel {
-						latestSampleMeanL0Sublevels := roachtestutil.GetMeanOverLastN(sampleCountForL0Sublevel, l0SublevelCount)
-						if latestSampleMeanL0Sublevels > subLevelThreshold {
-							t.Fatalf("sub-level mean %f over last %d iterations exceeded threshold", latestSampleMeanL0Sublevels, sampleCountForL0Sublevel)
+					if len(ioOverloadScore) >= sampleCountForIOOverload {
+						latestSampleMeanIOOverloadScore :=
+							roachtestutil.GetMeanOverLastN(sampleCountForIOOverload, ioOverloadScore)
+						if latestSampleMeanIOOverloadScore > ioOverloadThreshold {
+							t.Fatalf("io-overload score mean %f over last %d iterations exceeded threshold",
+								latestSampleMeanIOOverloadScore, sampleCountForIOOverload)
 						}
 					}
 					if timeutil.Now().After(endTime) {