@@ -265,12 +265,41 @@ internal void SnapshotDeltaWithMetricPointReclaim()
265265
266266 if ( metricPoint . MetricPointStatus == MetricPointStatus . NoCollectPending )
267267 {
268+ // Reclaim the MetricPoint if it was marked for it in the previous collect cycle
269+ if ( metricPoint . LookupData != null && metricPoint . LookupData . DeferredReclaim == true )
270+ {
271+ this . ReclaimMetricPoint ( ref metricPoint , i ) ;
272+ continue ;
273+ }
274+
275+ // Check if the MetricPoint could be reclaimed in the current Collect cycle.
268276 // If metricPoint.LookupData is `null` then the MetricPoint is already reclaimed and in the queue.
269277 // If the Collect thread is successfully able to compare and swap the reference count from zero to int.MinValue, it means that
270278 // the MetricPoint can be reused for other tags.
271279 if ( metricPoint . LookupData != null && Interlocked . CompareExchange ( ref metricPoint . ReferenceCount , int . MinValue , 0 ) == 0 )
272280 {
273- this . ReclaimMetricPoint ( ref metricPoint , i ) ;
281+ // This is similar to double-checked locking. For some rare case, the Collect thread might read the status as `NoCollectPending`,
282+ // and then get switched out before it could set the ReferenceCount to `int.MinValue`. In the meantime, an Update thread could come in
283+ // and update the MetricPoint, thereby, setting its status to `CollectPending`. Note that the ReferenceCount would be 0 after the update.
284+ // If the Collect thread now wakes up, it would be able to set the ReferenceCount to `int.MinValue`, thereby, marking the MetricPoint
285+ // invalid for newer updates. In such cases, the MetricPoint, should not be reclaimed before taking its Snapshot.
286+
287+ if ( metricPoint . MetricPointStatus == MetricPointStatus . NoCollectPending )
288+ {
289+ this . ReclaimMetricPoint ( ref metricPoint , i ) ;
290+ }
291+ else
292+ {
293+ // MetricPoint's ReferenceCount is `int.MinValue` but it still has a collect pending. Take the MetricPoint's Snapshot
294+ // and mark it to be reclaimed in the next Collect cycle.
295+
296+ metricPoint . LookupData . DeferredReclaim = true ;
297+
298+ this . TakeMetricPointSnapshot ( ref metricPoint , outputDelta : true ) ;
299+
300+ this . currentMetricPointBatch [ this . batchSize ] = i ;
301+ this . batchSize ++ ;
302+ }
274303 }
275304
276305 continue ;
0 commit comments