Skip to content

Commit 38138d2

Browse files
committed
Addressed comments
Signed-off-by: Sriram Ganesh <[email protected]>
1 parent 112ce0d commit 38138d2

File tree

2 files changed

+171
-56
lines changed

2 files changed

+171
-56
lines changed

server/src/main/java/org/opensearch/index/AdaptiveTieredMergePolicyProvider.java

Lines changed: 91 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public class AdaptiveTieredMergePolicyProvider implements MergePolicyProvider {
4141
private static final ByteSizeValue SMALL_SHARD_MAX_SEGMENT = new ByteSizeValue(50, ByteSizeUnit.MB);
4242
private static final ByteSizeValue MEDIUM_SHARD_MAX_SEGMENT = new ByteSizeValue(200, ByteSizeUnit.MB);
4343
private static final ByteSizeValue LARGE_SHARD_MAX_SEGMENT = new ByteSizeValue(1, ByteSizeUnit.GB);
44-
private static final ByteSizeValue VERY_LARGE_SHARD_MAX_SEGMENT = new ByteSizeValue(2, ByteSizeUnit.GB);
44+
private static final ByteSizeValue VERY_LARGE_SHARD_MAX_SEGMENT = new ByteSizeValue(5, ByteSizeUnit.GB);
4545

4646
// Adaptive floor segment sizes
4747
private static final ByteSizeValue SMALL_SHARD_FLOOR = new ByteSizeValue(10, ByteSizeUnit.MB);
@@ -136,36 +136,12 @@ private ShardSizeCategory categorizeShardSize(long sizeBytes) {
136136
}
137137

138138
private void applyAdaptiveSettings(ShardSizeCategory category) {
139-
ByteSizeValue maxSegmentSize;
140-
ByteSizeValue floorSegmentSize;
141-
double segmentsPerTier;
142-
143-
switch (category) {
144-
case SMALL:
145-
maxSegmentSize = SMALL_SHARD_MAX_SEGMENT;
146-
floorSegmentSize = SMALL_SHARD_FLOOR;
147-
segmentsPerTier = SMALL_SHARD_SEGMENTS_PER_TIER;
148-
break;
149-
case MEDIUM:
150-
maxSegmentSize = MEDIUM_SHARD_MAX_SEGMENT;
151-
floorSegmentSize = MEDIUM_SHARD_FLOOR;
152-
segmentsPerTier = MEDIUM_SHARD_SEGMENTS_PER_TIER;
153-
break;
154-
case LARGE:
155-
maxSegmentSize = LARGE_SHARD_MAX_SEGMENT;
156-
floorSegmentSize = LARGE_SHARD_FLOOR;
157-
segmentsPerTier = LARGE_SHARD_SEGMENTS_PER_TIER;
158-
break;
159-
case VERY_LARGE:
160-
maxSegmentSize = VERY_LARGE_SHARD_MAX_SEGMENT;
161-
floorSegmentSize = VERY_LARGE_SHARD_FLOOR;
162-
segmentsPerTier = VERY_LARGE_SHARD_SEGMENTS_PER_TIER;
163-
break;
164-
default:
165-
maxSegmentSize = MEDIUM_SHARD_MAX_SEGMENT;
166-
floorSegmentSize = MEDIUM_SHARD_FLOOR;
167-
segmentsPerTier = MEDIUM_SHARD_SEGMENTS_PER_TIER;
168-
}
139+
// Use smooth interpolation instead of discrete categories to avoid dramatic parameter jumps
140+
long shardSizeBytes = estimateShardSize();
141+
142+
ByteSizeValue maxSegmentSize = calculateSmoothMaxSegmentSize(shardSizeBytes);
143+
ByteSizeValue floorSegmentSize = calculateSmoothFloorSegmentSize(shardSizeBytes);
144+
double segmentsPerTier = calculateSmoothSegmentsPerTier(shardSizeBytes);
169145

170146
// Apply the adaptive settings
171147
tieredMergePolicy.setMaxMergedSegmentMB(maxSegmentSize.getMbFrac());
@@ -186,6 +162,90 @@ private void applyAdaptiveSettings(ShardSizeCategory category) {
186162
);
187163
}
188164

165+
/**
166+
* Calculate smooth max segment size using logarithmic interpolation
167+
* to avoid dramatic jumps at category boundaries
168+
*/
169+
private ByteSizeValue calculateSmoothMaxSegmentSize(long shardSizeBytes) {
170+
// Use logarithmic interpolation between reference points
171+
// Reference points: 50MB@100MB, 200MB@1GB, 1GB@10GB, 5GB@100GB
172+
double logSize = Math.log10(shardSizeBytes);
173+
174+
if (logSize < 8.0) { // < 100MB
175+
return SMALL_SHARD_MAX_SEGMENT;
176+
} else if (logSize < 9.0) { // 100MB - 1GB
177+
// Linear interpolation between 50MB and 200MB
178+
double ratio = (logSize - 8.0) / 1.0;
179+
long interpolatedSize = (long) (SMALL_SHARD_MAX_SEGMENT.getBytes() + ratio * (MEDIUM_SHARD_MAX_SEGMENT.getBytes()
180+
- SMALL_SHARD_MAX_SEGMENT.getBytes()));
181+
return new ByteSizeValue(interpolatedSize);
182+
} else if (logSize < 10.0) { // 1GB - 10GB
183+
// Linear interpolation between 200MB and 1GB
184+
double ratio = (logSize - 9.0) / 1.0;
185+
long interpolatedSize = (long) (MEDIUM_SHARD_MAX_SEGMENT.getBytes() + ratio * (LARGE_SHARD_MAX_SEGMENT.getBytes()
186+
- MEDIUM_SHARD_MAX_SEGMENT.getBytes()));
187+
return new ByteSizeValue(interpolatedSize);
188+
} else if (logSize < 11.0) { // 10GB - 100GB
189+
// Linear interpolation between 1GB and 5GB
190+
double ratio = (logSize - 10.0) / 1.0;
191+
long interpolatedSize = (long) (LARGE_SHARD_MAX_SEGMENT.getBytes() + ratio * (VERY_LARGE_SHARD_MAX_SEGMENT.getBytes()
192+
- LARGE_SHARD_MAX_SEGMENT.getBytes()));
193+
return new ByteSizeValue(interpolatedSize);
194+
} else { // >= 100GB
195+
return VERY_LARGE_SHARD_MAX_SEGMENT;
196+
}
197+
}
198+
199+
/**
200+
* Calculate smooth floor segment size using logarithmic interpolation
201+
*/
202+
private ByteSizeValue calculateSmoothFloorSegmentSize(long shardSizeBytes) {
203+
double logSize = Math.log10(shardSizeBytes);
204+
205+
if (logSize < 8.0) { // < 100MB
206+
return SMALL_SHARD_FLOOR;
207+
} else if (logSize < 9.0) { // 100MB - 1GB
208+
double ratio = (logSize - 8.0) / 1.0;
209+
long interpolatedSize = (long) (SMALL_SHARD_FLOOR.getBytes() + ratio * (MEDIUM_SHARD_FLOOR.getBytes() - SMALL_SHARD_FLOOR
210+
.getBytes()));
211+
return new ByteSizeValue(interpolatedSize);
212+
} else if (logSize < 10.0) { // 1GB - 10GB
213+
double ratio = (logSize - 9.0) / 1.0;
214+
long interpolatedSize = (long) (MEDIUM_SHARD_FLOOR.getBytes() + ratio * (LARGE_SHARD_FLOOR.getBytes() - MEDIUM_SHARD_FLOOR
215+
.getBytes()));
216+
return new ByteSizeValue(interpolatedSize);
217+
} else if (logSize < 11.0) { // 10GB - 100GB
218+
double ratio = (logSize - 10.0) / 1.0;
219+
long interpolatedSize = (long) (LARGE_SHARD_FLOOR.getBytes() + ratio * (VERY_LARGE_SHARD_FLOOR.getBytes() - LARGE_SHARD_FLOOR
220+
.getBytes()));
221+
return new ByteSizeValue(interpolatedSize);
222+
} else { // >= 100GB
223+
return VERY_LARGE_SHARD_FLOOR;
224+
}
225+
}
226+
227+
/**
228+
* Calculate smooth segments per tier using logarithmic interpolation
229+
*/
230+
private double calculateSmoothSegmentsPerTier(long shardSizeBytes) {
231+
double logSize = Math.log10(shardSizeBytes);
232+
233+
if (logSize < 8.0) { // < 100MB
234+
return SMALL_SHARD_SEGMENTS_PER_TIER;
235+
} else if (logSize < 9.0) { // 100MB - 1GB
236+
double ratio = (logSize - 8.0) / 1.0;
237+
return SMALL_SHARD_SEGMENTS_PER_TIER + ratio * (MEDIUM_SHARD_SEGMENTS_PER_TIER - SMALL_SHARD_SEGMENTS_PER_TIER);
238+
} else if (logSize < 10.0) { // 1GB - 10GB
239+
double ratio = (logSize - 9.0) / 1.0;
240+
return MEDIUM_SHARD_SEGMENTS_PER_TIER + ratio * (LARGE_SHARD_SEGMENTS_PER_TIER - MEDIUM_SHARD_SEGMENTS_PER_TIER);
241+
} else if (logSize < 11.0) { // 10GB - 100GB
242+
double ratio = (logSize - 10.0) / 1.0;
243+
return LARGE_SHARD_SEGMENTS_PER_TIER + ratio * (VERY_LARGE_SHARD_SEGMENTS_PER_TIER - LARGE_SHARD_SEGMENTS_PER_TIER);
244+
} else { // >= 100GB
245+
return VERY_LARGE_SHARD_SEGMENTS_PER_TIER;
246+
}
247+
}
248+
189249
private void applyDefaultSettings() {
190250
// Fallback to the original default settings
191251
tieredMergePolicy.setMaxMergedSegmentMB(5 * 1024); // 5GB

server/src/main/java/org/opensearch/index/analysis/SegmentTopologyAnalyzer.java

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -168,45 +168,100 @@ public MergePolicyRecommendations analyzeAndRecommend() {
168168
}
169169

170170
private long calculateRecommendedMaxSegmentSize() {
171-
// Base recommendation on total size and current distribution
172-
long totalSize = totalSizeBytes;
171+
// Use smooth interpolation to avoid dramatic parameter jumps
172+
return calculateSmoothMaxSegmentSize(totalSizeBytes);
173+
}
174+
175+
/**
176+
* Calculate smooth max segment size using logarithmic interpolation
177+
* to avoid dramatic jumps at category boundaries
178+
*/
179+
private long calculateSmoothMaxSegmentSize(long shardSizeBytes) {
180+
// Reference points: 50MB@100MB, 200MB@1GB, 1GB@10GB, 5GB@100GB
181+
double logSize = Math.log10(shardSizeBytes);
173182

174-
if (totalSize < 100 * 1024 * 1024) { // < 100MB
183+
if (logSize < 8.0) { // < 100MB
175184
return 50 * 1024 * 1024; // 50MB
176-
} else if (totalSize < 1024 * 1024 * 1024) { // < 1GB
177-
return 200 * 1024 * 1024; // 200MB
178-
} else if (totalSize < 10L * 1024 * 1024 * 1024) { // < 10GB
179-
return 1024 * 1024 * 1024; // 1GB
180-
} else { // >= 10GB
181-
return 2L * 1024 * 1024 * 1024; // 2GB
185+
} else if (logSize < 9.0) { // 100MB - 1GB
186+
// Linear interpolation between 50MB and 200MB
187+
double ratio = (logSize - 8.0) / 1.0;
188+
long smallSize = 50 * 1024 * 1024;
189+
long mediumSize = 200 * 1024 * 1024;
190+
return (long) (smallSize + ratio * (mediumSize - smallSize));
191+
} else if (logSize < 10.0) { // 1GB - 10GB
192+
// Linear interpolation between 200MB and 1GB
193+
double ratio = (logSize - 9.0) / 1.0;
194+
long mediumSize = 200 * 1024 * 1024;
195+
long largeSize = 1024 * 1024 * 1024;
196+
return (long) (mediumSize + ratio * (largeSize - mediumSize));
197+
} else if (logSize < 11.0) { // 10GB - 100GB
198+
// Linear interpolation between 1GB and 5GB
199+
double ratio = (logSize - 10.0) / 1.0;
200+
long largeSize = 1024 * 1024 * 1024;
201+
long veryLargeSize = 5L * 1024 * 1024 * 1024;
202+
return (long) (largeSize + ratio * (veryLargeSize - largeSize));
203+
} else { // >= 100GB
204+
return 5L * 1024 * 1024 * 1024; // 5GB
182205
}
183206
}
184207

185208
private long calculateRecommendedFloorSegmentSize() {
186-
long totalSize = totalSizeBytes;
209+
// Use smooth interpolation to avoid dramatic parameter jumps
210+
return calculateSmoothFloorSegmentSize(totalSizeBytes);
211+
}
187212

188-
if (totalSize < 100 * 1024 * 1024) { // < 100MB
213+
/**
214+
* Calculate smooth floor segment size using logarithmic interpolation
215+
*/
216+
private long calculateSmoothFloorSegmentSize(long shardSizeBytes) {
217+
double logSize = Math.log10(shardSizeBytes);
218+
219+
if (logSize < 8.0) { // < 100MB
189220
return 10 * 1024 * 1024; // 10MB
190-
} else if (totalSize < 1024 * 1024 * 1024) { // < 1GB
191-
return 25 * 1024 * 1024; // 25MB
192-
} else if (totalSize < 10L * 1024 * 1024 * 1024) { // < 10GB
193-
return 50 * 1024 * 1024; // 50MB
194-
} else { // >= 10GB
221+
} else if (logSize < 9.0) { // 100MB - 1GB
222+
double ratio = (logSize - 8.0) / 1.0;
223+
long smallSize = 10 * 1024 * 1024;
224+
long mediumSize = 25 * 1024 * 1024;
225+
return (long) (smallSize + ratio * (mediumSize - smallSize));
226+
} else if (logSize < 10.0) { // 1GB - 10GB
227+
double ratio = (logSize - 9.0) / 1.0;
228+
long mediumSize = 25 * 1024 * 1024;
229+
long largeSize = 50 * 1024 * 1024;
230+
return (long) (mediumSize + ratio * (largeSize - mediumSize));
231+
} else if (logSize < 11.0) { // 10GB - 100GB
232+
double ratio = (logSize - 10.0) / 1.0;
233+
long largeSize = 50 * 1024 * 1024;
234+
long veryLargeSize = 100 * 1024 * 1024;
235+
return (long) (largeSize + ratio * (veryLargeSize - largeSize));
236+
} else { // >= 100GB
195237
return 100 * 1024 * 1024; // 100MB
196238
}
197239
}
198240

199241
private int calculateOptimalSegmentCount() {
200-
long totalSize = totalSizeBytes;
242+
// Use smooth interpolation to avoid dramatic parameter jumps
243+
return (int) Math.round(calculateSmoothSegmentsPerTier(totalSizeBytes));
244+
}
201245

202-
if (totalSize < 100 * 1024 * 1024) { // < 100MB
203-
return 5;
204-
} else if (totalSize < 1024 * 1024 * 1024) { // < 1GB
205-
return 8;
206-
} else if (totalSize < 10L * 1024 * 1024 * 1024) { // < 10GB
207-
return 10;
208-
} else { // >= 10GB
209-
return 12;
246+
/**
247+
* Calculate smooth segments per tier using logarithmic interpolation
248+
*/
249+
private double calculateSmoothSegmentsPerTier(long shardSizeBytes) {
250+
double logSize = Math.log10(shardSizeBytes);
251+
252+
if (logSize < 8.0) { // < 100MB
253+
return 5.0;
254+
} else if (logSize < 9.0) { // 100MB - 1GB
255+
double ratio = (logSize - 8.0) / 1.0;
256+
return 5.0 + ratio * (8.0 - 5.0);
257+
} else if (logSize < 10.0) { // 1GB - 10GB
258+
double ratio = (logSize - 9.0) / 1.0;
259+
return 8.0 + ratio * (10.0 - 8.0);
260+
} else if (logSize < 11.0) { // 10GB - 100GB
261+
double ratio = (logSize - 10.0) / 1.0;
262+
return 10.0 + ratio * (12.0 - 10.0);
263+
} else { // >= 100GB
264+
return 12.0;
210265
}
211266
}
212267

0 commit comments

Comments
 (0)