Skip to content

Commit af446a2

Browse files
makazeuamadeuszl
andauthored
Implement disk io metrics for linux (#6374)
* Implement DiskStatsReader * Add a UT * Add LinuxDiskMetrics * Add DiskOperation and DiskIoTime metrics for Linux * update * Add more tests * Add UT * Update src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/Log.cs Co-authored-by: Amadeusz Lechniak <[email protected]> * Change ulong properties to uint in DiskStats and DiskStatsReader for improved memory efficiency * update * Rename `EnableDiskIoMetrics` to `EnableSystemDiskIoMetrics` * Add compatibility suppressions for EnableDiskIoMetrics * Add compatibility suppressions for EnableDiskIoMetrics --------- Co-authored-by: Amadeusz Lechniak <[email protected]>
1 parent ce70b6d commit af446a2

File tree

15 files changed

+782
-17
lines changed

15 files changed

+782
-17
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<!-- https://learn.microsoft.com/dotnet/fundamentals/package-validation/diagnostic-ids -->
3+
<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
4+
<Suppression>
5+
<DiagnosticId>CP0002</DiagnosticId>
6+
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_EnableDiskIoMetrics</Target>
7+
<Left>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
8+
<Right>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
9+
<IsBaselineSuppression>true</IsBaselineSuppression>
10+
</Suppression>
11+
<Suppression>
12+
<DiagnosticId>CP0002</DiagnosticId>
13+
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_EnableDiskIoMetrics(System.Boolean)</Target>
14+
<Left>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
15+
<Right>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
16+
<IsBaselineSuppression>true</IsBaselineSuppression>
17+
</Suppression>
18+
<Suppression>
19+
<DiagnosticId>CP0002</DiagnosticId>
20+
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_EnableDiskIoMetrics</Target>
21+
<Left>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
22+
<Right>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
23+
<IsBaselineSuppression>true</IsBaselineSuppression>
24+
</Suppression>
25+
<Suppression>
26+
<DiagnosticId>CP0002</DiagnosticId>
27+
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_EnableDiskIoMetrics(System.Boolean)</Target>
28+
<Left>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
29+
<Right>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
30+
<IsBaselineSuppression>true</IsBaselineSuppression>
31+
</Suppression>
32+
<Suppression>
33+
<DiagnosticId>CP0002</DiagnosticId>
34+
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_EnableDiskIoMetrics</Target>
35+
<Left>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
36+
<Right>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
37+
<IsBaselineSuppression>true</IsBaselineSuppression>
38+
</Suppression>
39+
<Suppression>
40+
<DiagnosticId>CP0002</DiagnosticId>
41+
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_EnableDiskIoMetrics(System.Boolean)</Target>
42+
<Left>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
43+
<Right>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
44+
<IsBaselineSuppression>true</IsBaselineSuppression>
45+
</Suppression>
46+
</Suppressions>
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Linux.Disk;
5+
6+
/// <summary>
7+
/// Represents one line of statistics from "/proc/diskstats"
8+
/// See https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats for details.
9+
/// </summary>
10+
internal sealed class DiskStats
11+
{
12+
public int MajorNumber { get; set; }
13+
public int MinorNumber { get; set; }
14+
public string DeviceName { get; set; } = string.Empty;
15+
public ulong ReadsCompleted { get; set; }
16+
public ulong ReadsMerged { get; set; }
17+
public ulong SectorsRead { get; set; }
18+
public uint TimeReadingMs { get; set; }
19+
public ulong WritesCompleted { get; set; }
20+
public ulong WritesMerged { get; set; }
21+
public ulong SectorsWritten { get; set; }
22+
public uint TimeWritingMs { get; set; }
23+
public uint IoInProgress { get; set; }
24+
public uint TimeIoMs { get; set; }
25+
public uint WeightedTimeIoMs { get; set; }
26+
27+
// The following fields are available starting from kernel 4.18; if absent, remain 0
28+
public ulong DiscardsCompleted { get; set; }
29+
public ulong DiscardsMerged { get; set; }
30+
public ulong SectorsDiscarded { get; set; }
31+
public uint TimeDiscardingMs { get; set; }
32+
33+
// The following fields are available starting from kernel 5.5; if absent, remain 0
34+
public ulong FlushRequestsCompleted { get; set; }
35+
public uint TimeFlushingMs { get; set; }
36+
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics.CodeAnalysis;
7+
using System.Globalization;
8+
using System.IO;
9+
using Microsoft.Extensions.ObjectPool;
10+
using Microsoft.Shared.Pools;
11+
12+
namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Linux.Disk;
13+
14+
/// <summary>
15+
/// Handles reading and parsing of Linux procfs-diskstats file(/proc/diskstats).
16+
/// </summary>
17+
internal sealed class DiskStatsReader(IFileSystem fileSystem) : IDiskStatsReader
18+
{
19+
private static readonly FileInfo _diskStatsFile = new("/proc/diskstats");
20+
private static readonly ObjectPool<BufferWriter<char>> _sharedBufferWriterPool = BufferWriterPool.CreateBufferWriterPool<char>();
21+
22+
/// <summary>
23+
/// Reads and returns all disk statistics entries.
24+
/// </summary>
25+
/// <returns>List of <see cref="DiskStats"/>.</returns>
26+
public List<DiskStats> ReadAll()
27+
{
28+
var diskStatsList = new List<DiskStats>();
29+
30+
using ReturnableBufferWriter<char> bufferWriter = new(_sharedBufferWriterPool);
31+
using IEnumerator<ReadOnlyMemory<char>> enumerableLines = fileSystem.ReadAllByLines(_diskStatsFile, bufferWriter.Buffer).GetEnumerator();
32+
33+
while (enumerableLines.MoveNext())
34+
{
35+
string line = enumerableLines.Current.Trim().ToString();
36+
if (string.IsNullOrWhiteSpace(line))
37+
{
38+
continue;
39+
}
40+
41+
try
42+
{
43+
DiskStats stat = DiskStatsReader.ParseLine(line);
44+
diskStatsList.Add(stat);
45+
}
46+
#pragma warning disable CA1031
47+
catch (Exception)
48+
#pragma warning restore CA1031
49+
{
50+
// ignore parsing errors
51+
}
52+
}
53+
54+
return diskStatsList;
55+
}
56+
57+
/// <summary>
58+
/// Parses one line of text into a DiskStats object.
59+
/// </summary>
60+
/// <param name="line">one line in "/proc/diskstats".</param>
61+
/// <returns>parsed DiskStats object.</returns>
62+
[SuppressMessage("Major Code Smell", "S109:Magic numbers should not be used", Justification = "These numbers represent fixed field indices in the Linux /proc/diskstats format")]
63+
private static DiskStats ParseLine(string line)
64+
{
65+
// Split by any whitespace and remove empty entries
66+
#pragma warning disable EA0009
67+
string[] parts = line.Split(Array.Empty<char>(), StringSplitOptions.RemoveEmptyEntries);
68+
#pragma warning restore EA0009
69+
70+
if (parts.Length < 14)
71+
{
72+
throw new FormatException($"Not enough fields: expected at least 14, got {parts.Length}");
73+
}
74+
75+
// See https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
76+
var diskStats = new DiskStats
77+
{
78+
MajorNumber = int.Parse(parts[0], CultureInfo.InvariantCulture),
79+
MinorNumber = int.Parse(parts[1], CultureInfo.InvariantCulture),
80+
DeviceName = parts[2],
81+
ReadsCompleted = ulong.Parse(parts[3], CultureInfo.InvariantCulture),
82+
ReadsMerged = ulong.Parse(parts[4], CultureInfo.InvariantCulture),
83+
SectorsRead = ulong.Parse(parts[5], CultureInfo.InvariantCulture),
84+
TimeReadingMs = uint.Parse(parts[6], CultureInfo.InvariantCulture),
85+
WritesCompleted = ulong.Parse(parts[7], CultureInfo.InvariantCulture),
86+
WritesMerged = ulong.Parse(parts[8], CultureInfo.InvariantCulture),
87+
SectorsWritten = ulong.Parse(parts[9], CultureInfo.InvariantCulture),
88+
TimeWritingMs = uint.Parse(parts[10], CultureInfo.InvariantCulture),
89+
IoInProgress = uint.Parse(parts[11], CultureInfo.InvariantCulture),
90+
TimeIoMs = uint.Parse(parts[12], CultureInfo.InvariantCulture),
91+
WeightedTimeIoMs = uint.Parse(parts[13], CultureInfo.InvariantCulture)
92+
};
93+
94+
// Parse additional fields if present
95+
if (parts.Length >= 18)
96+
{
97+
diskStats.DiscardsCompleted = ulong.Parse(parts[14], CultureInfo.InvariantCulture);
98+
diskStats.DiscardsMerged = ulong.Parse(parts[15], CultureInfo.InvariantCulture);
99+
diskStats.SectorsDiscarded = ulong.Parse(parts[16], CultureInfo.InvariantCulture);
100+
diskStats.TimeDiscardingMs = uint.Parse(parts[17], CultureInfo.InvariantCulture);
101+
}
102+
103+
if (parts.Length >= 20)
104+
{
105+
diskStats.FlushRequestsCompleted = ulong.Parse(parts[18], CultureInfo.InvariantCulture);
106+
diskStats.TimeFlushingMs = uint.Parse(parts[19], CultureInfo.InvariantCulture);
107+
}
108+
109+
return diskStats;
110+
}
111+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
6+
namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Linux.Disk;
7+
8+
/// <summary>
9+
/// An interface for reading disk statistics.
10+
/// </summary>
11+
internal interface IDiskStatsReader
12+
{
13+
/// <summary>
14+
/// Gets all the disk statistics from the system.
15+
/// </summary>
16+
/// <returns>List of <see cref="DiskStats"/> instances.</returns>
17+
List<DiskStats> ReadAll();
18+
}
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics;
7+
using System.Diagnostics.Metrics;
8+
using System.Linq;
9+
using Microsoft.Extensions.Logging;
10+
using Microsoft.Extensions.Logging.Abstractions;
11+
using Microsoft.Extensions.Options;
12+
using Microsoft.Shared.Instruments;
13+
14+
namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Linux.Disk;
15+
16+
internal sealed class LinuxSystemDiskMetrics
17+
{
18+
// The kernel's block layer always reports counts in 512-byte "sectors" regardless of the underlying device's real block size
19+
// https://docs.kernel.org/block/stat.html#read-sectors-write-sectors-discard-sectors
20+
private const int LinuxDiskSectorSize = 512;
21+
private const int MinimumDiskStatsRefreshIntervalInSeconds = 10;
22+
private const string DeviceKey = "system.device";
23+
private const string DirectionKey = "disk.io.direction";
24+
25+
private static readonly KeyValuePair<string, object?> _directionReadTag = new(DirectionKey, "read");
26+
private static readonly KeyValuePair<string, object?> _directionWriteTag = new(DirectionKey, "write");
27+
private readonly ILogger<LinuxSystemDiskMetrics> _logger;
28+
private readonly TimeProvider _timeProvider;
29+
private readonly IDiskStatsReader _diskStatsReader;
30+
private readonly object _lock = new();
31+
private readonly Dictionary<string, DiskStats> _baselineDiskStatsDict = [];
32+
private List<DiskStats> _diskStatsSnapshot = [];
33+
private DateTimeOffset _lastRefreshTime = DateTimeOffset.MinValue;
34+
35+
public LinuxSystemDiskMetrics(
36+
ILogger<LinuxSystemDiskMetrics>? logger,
37+
IMeterFactory meterFactory,
38+
IOptions<ResourceMonitoringOptions> options,
39+
TimeProvider timeProvider,
40+
IDiskStatsReader diskStatsReader)
41+
{
42+
_logger = logger ?? NullLogger<LinuxSystemDiskMetrics>.Instance;
43+
_timeProvider = timeProvider;
44+
_diskStatsReader = diskStatsReader;
45+
if (!options.Value.EnableSystemDiskIoMetrics)
46+
{
47+
return;
48+
}
49+
50+
// We need to read the disk stats once to get the baseline values
51+
_baselineDiskStatsDict = GetAllDiskStats().ToDictionary(d => d.DeviceName);
52+
53+
#pragma warning disable CA2000 // Dispose objects before losing scope
54+
// We don't dispose the meter because IMeterFactory handles that
55+
// It's a false-positive, see: https://github.com/dotnet/roslyn-analyzers/issues/6912.
56+
// Related documentation: https://github.com/dotnet/docs/pull/37170
57+
Meter meter = meterFactory.Create(ResourceUtilizationInstruments.MeterName);
58+
#pragma warning restore CA2000 // Dispose objects before losing scope
59+
60+
// The metric is aligned with
61+
// https://opentelemetry.io/docs/specs/semconv/system/system-metrics/#metric-systemdiskio
62+
_ = meter.CreateObservableCounter(
63+
ResourceUtilizationInstruments.SystemDiskIo,
64+
GetDiskIoMeasurements,
65+
unit: "By",
66+
description: "Disk bytes transferred");
67+
68+
// The metric is aligned with
69+
// https://opentelemetry.io/docs/specs/semconv/system/system-metrics/#metric-systemdiskoperations
70+
_ = meter.CreateObservableCounter(
71+
ResourceUtilizationInstruments.SystemDiskOperations,
72+
GetDiskOperationMeasurements,
73+
unit: "{operation}",
74+
description: "Disk operations");
75+
76+
// The metric is aligned with
77+
// https://opentelemetry.io/docs/specs/semconv/system/system-metrics/#metric-systemdiskio_time
78+
_ = meter.CreateObservableCounter(
79+
ResourceUtilizationInstruments.SystemDiskIoTime,
80+
GetDiskIoTimeMeasurements,
81+
unit: "s",
82+
description: "Time disk spent activated");
83+
}
84+
85+
private IEnumerable<Measurement<long>> GetDiskIoMeasurements()
86+
{
87+
List<Measurement<long>> measurements = [];
88+
List<DiskStats> diskStatsSnapshot = GetDiskStatsSnapshot();
89+
90+
foreach (DiskStats diskStats in diskStatsSnapshot)
91+
{
92+
_ = _baselineDiskStatsDict.TryGetValue(diskStats.DeviceName, out DiskStats? baselineDiskStats);
93+
long readBytes = (long)(diskStats.SectorsRead - baselineDiskStats?.SectorsRead ?? 0L) * LinuxDiskSectorSize;
94+
long writeBytes = (long)(diskStats.SectorsWritten - baselineDiskStats?.SectorsWritten ?? 0L) * LinuxDiskSectorSize;
95+
measurements.Add(new Measurement<long>(readBytes, new TagList { _directionReadTag, new(DeviceKey, diskStats.DeviceName) }));
96+
measurements.Add(new Measurement<long>(writeBytes, new TagList { _directionWriteTag, new(DeviceKey, diskStats.DeviceName) }));
97+
}
98+
99+
return measurements;
100+
}
101+
102+
private IEnumerable<Measurement<long>> GetDiskOperationMeasurements()
103+
{
104+
List<Measurement<long>> measurements = [];
105+
List<DiskStats> diskStatsSnapshot = GetDiskStatsSnapshot();
106+
107+
foreach (DiskStats diskStats in diskStatsSnapshot)
108+
{
109+
_ = _baselineDiskStatsDict.TryGetValue(diskStats.DeviceName, out DiskStats? baselineDiskStats);
110+
long readCount = (long)(diskStats.ReadsCompleted - baselineDiskStats?.ReadsCompleted ?? 0L);
111+
long writeCount = (long)(diskStats.WritesCompleted - baselineDiskStats?.WritesCompleted ?? 0L);
112+
measurements.Add(new Measurement<long>(readCount, new TagList { _directionReadTag, new(DeviceKey, diskStats.DeviceName) }));
113+
measurements.Add(new Measurement<long>(writeCount, new TagList { _directionWriteTag, new(DeviceKey, diskStats.DeviceName) }));
114+
}
115+
116+
return measurements;
117+
}
118+
119+
private IEnumerable<Measurement<double>> GetDiskIoTimeMeasurements()
120+
{
121+
List<Measurement<double>> measurements = [];
122+
List<DiskStats> diskStatsSnapshot = GetDiskStatsSnapshot();
123+
124+
foreach (DiskStats diskStats in diskStatsSnapshot)
125+
{
126+
_ = _baselineDiskStatsDict.TryGetValue(diskStats.DeviceName, out DiskStats? baselineDiskStats);
127+
double ioTimeSeconds = (diskStats.TimeIoMs - baselineDiskStats?.TimeIoMs ?? 0) / 1000.0; // Convert to seconds
128+
measurements.Add(new Measurement<double>(ioTimeSeconds, new TagList { new(DeviceKey, diskStats.DeviceName) }));
129+
}
130+
131+
return measurements;
132+
}
133+
134+
private List<DiskStats> GetDiskStatsSnapshot()
135+
{
136+
lock (_lock)
137+
{
138+
DateTimeOffset now = _timeProvider.GetUtcNow();
139+
if (_diskStatsSnapshot.Count == 0 || (now - _lastRefreshTime).TotalSeconds > MinimumDiskStatsRefreshIntervalInSeconds)
140+
{
141+
_diskStatsSnapshot = GetAllDiskStats();
142+
_lastRefreshTime = now;
143+
}
144+
}
145+
146+
return _diskStatsSnapshot;
147+
}
148+
149+
private List<DiskStats> GetAllDiskStats()
150+
{
151+
try
152+
{
153+
List<DiskStats> diskStatsList = _diskStatsReader.ReadAll();
154+
155+
// We should not include ram, loop, or dm(device-mapper) devices in the disk stats, should we?
156+
diskStatsList = diskStatsList
157+
.Where(d => !d.DeviceName.StartsWith("ram", StringComparison.OrdinalIgnoreCase)
158+
&& !d.DeviceName.StartsWith("loop", StringComparison.OrdinalIgnoreCase)
159+
&& !d.DeviceName.StartsWith("dm-", StringComparison.OrdinalIgnoreCase))
160+
.ToList();
161+
return diskStatsList;
162+
}
163+
#pragma warning disable CA1031
164+
catch (Exception ex)
165+
#pragma warning restore CA1031
166+
{
167+
Log.HandleDiskStatsException(_logger, ex.Message);
168+
}
169+
170+
return [];
171+
}
172+
}

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/Log.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,8 @@ public static partial void CounterMessage100(
5656
public static partial void CounterMessage110(
5757
ILogger logger,
5858
long counterValue);
59+
60+
[LoggerMessage(7, LogLevel.Warning,
61+
"Error while getting disk stats: Error={errorMessage}")]
62+
public static partial void HandleDiskStatsException(ILogger logger, string errorMessage);
5963
}

0 commit comments

Comments
 (0)