@@ -61,6 +61,7 @@ public class RemoteFsTimestampAwareTranslog extends RemoteFsTranslog {
6161 private final Map <String , Tuple <Long , Long >> oldFormatMetadataFileGenerationMap ;
6262 private final Map <String , Tuple <Long , Long >> oldFormatMetadataFilePrimaryTermMap ;
6363 private final AtomicLong minPrimaryTermInRemote = new AtomicLong (Long .MAX_VALUE );
64+ private long maxDeletedGenerationOnRemote = 0 ;
6465
6566 public RemoteFsTimestampAwareTranslog (
6667 TranslogConfig config ,
@@ -135,13 +136,20 @@ protected void trimUnreferencedReaders(boolean indexDeleted, boolean trimLocal)
135136
136137 // This is to ensure that after the permits are acquired during primary relocation, there are no further modification on remote
137138 // store.
138- if (startedPrimarySupplier .getAsBoolean () == false || pauseSync .get ()) {
139+ if (( indexDeleted == false && startedPrimarySupplier .getAsBoolean () == false ) || pauseSync .get ()) {
139140 return ;
140141 }
141142
142143 // This is to fail fast and avoid listing md files un-necessarily.
143144 if (indexDeleted == false && RemoteStoreUtils .isPinnedTimestampStateStale ()) {
144- logger .warn ("Skipping remote segment store garbage collection as last fetch of pinned timestamp is stale" );
145+ logger .warn ("Skipping remote translog garbage collection as last fetch of pinned timestamp is stale" );
146+ return ;
147+ }
148+
149+ // This code block ensures parity with RemoteFsTranslog. Without this, we will end up making list translog metadata
150+ // call in each invocation of trimUnreferencedReaders
151+ long minGenerationToKeep = minRemoteGenReferenced - indexSettings ().getRemoteTranslogExtraKeep ();
152+ if (indexDeleted == false && (minGenerationToKeep <= maxDeletedGenerationOnRemote )) {
145153 return ;
146154 }
147155
@@ -158,24 +166,20 @@ public void onResponse(List<BlobMetadata> blobMetadata) {
158166 List <String > metadataFiles = blobMetadata .stream ().map (BlobMetadata ::name ).collect (Collectors .toList ());
159167
160168 try {
161- if (metadataFiles .size () <= 1 ) {
169+ if (indexDeleted == false && metadataFiles .size () <= 1 ) {
162170 logger .debug ("No stale translog metadata files found" );
163171 remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
164172 return ;
165173 }
166174
167175 // Check last fetch status of pinned timestamps. If stale, return.
168176 if (indexDeleted == false && RemoteStoreUtils .isPinnedTimestampStateStale ()) {
169- logger .warn ("Skipping remote segment store garbage collection as last fetch of pinned timestamp is stale" );
177+ logger .warn ("Skipping remote translog garbage collection as last fetch of pinned timestamp is stale" );
170178 remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
171179 return ;
172180 }
173181
174- List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (
175- metadataFiles ,
176- metadataFilePinnedTimestampMap ,
177- logger
178- );
182+ List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (metadataFiles , indexDeleted );
179183
180184 // If index is not deleted, make sure to keep latest metadata file
181185 if (indexDeleted == false ) {
@@ -194,21 +198,28 @@ public void onResponse(List<BlobMetadata> blobMetadata) {
194198 metadataFilesNotToBeDeleted .removeAll (metadataFilesToBeDeleted );
195199
196200 logger .debug (() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted );
201+
197202 Set <Long > generationsToBeDeleted = getGenerationsToBeDeleted (
198203 metadataFilesNotToBeDeleted ,
199204 metadataFilesToBeDeleted ,
200- indexDeleted
205+ indexDeleted ? Long . MAX_VALUE : minRemoteGenReferenced
201206 );
202207
203208 logger .debug (() -> "generationsToBeDeleted = " + generationsToBeDeleted );
204209 if (generationsToBeDeleted .isEmpty () == false ) {
210+ maxDeletedGenerationOnRemote = generationsToBeDeleted .stream ().max (Long ::compareTo ).get ();
211+
205212 // Delete stale generations
206213 translogTransferManager .deleteGenerationAsync (
207214 primaryTermSupplier .getAsLong (),
208215 generationsToBeDeleted ,
209216 remoteGenerationDeletionPermits ::release
210217 );
218+ } else {
219+ remoteGenerationDeletionPermits .release ();
220+ }
211221
222+ if (metadataFilesToBeDeleted .isEmpty () == false ) {
212223 // Delete stale metadata files
213224 translogTransferManager .deleteMetadataFilesAsync (
214225 metadataFilesToBeDeleted ,
@@ -217,11 +228,10 @@ public void onResponse(List<BlobMetadata> blobMetadata) {
217228
218229 // Update cache to keep only those metadata files that are not getting deleted
219230 oldFormatMetadataFileGenerationMap .keySet ().retainAll (metadataFilesNotToBeDeleted );
220-
221231 // Delete stale primary terms
222232 deleteStaleRemotePrimaryTerms (metadataFilesNotToBeDeleted );
223233 } else {
224- remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
234+ remoteGenerationDeletionPermits .release ();
225235 }
226236 } catch (Exception e ) {
227237 remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
@@ -241,14 +251,8 @@ public void onFailure(Exception e) {
241251 protected Set <Long > getGenerationsToBeDeleted (
242252 List <String > metadataFilesNotToBeDeleted ,
243253 List <String > metadataFilesToBeDeleted ,
244- boolean indexDeleted
254+ long minRemoteGenReferenced
245255 ) throws IOException {
246- long maxGenerationToBeDeleted = Long .MAX_VALUE ;
247-
248- if (indexDeleted == false ) {
249- maxGenerationToBeDeleted = minRemoteGenReferenced - 1 - indexSettings ().getRemoteTranslogExtraKeep ();
250- }
251-
252256 Set <Long > generationsFromMetadataFilesToBeDeleted = new HashSet <>();
253257 for (String mdFile : metadataFilesToBeDeleted ) {
254258 Tuple <Long , Long > minMaxGen = getMinMaxTranslogGenerationFromMetadataFile (mdFile , translogTransferManager );
@@ -262,24 +266,36 @@ protected Set<Long> getGenerationsToBeDeleted(
262266 Set <Long > generationsToBeDeleted = new HashSet <>();
263267 for (long generation : generationsFromMetadataFilesToBeDeleted ) {
264268 // Check if the generation is not referred by metadata file matching pinned timestamps
265- if (generation <= maxGenerationToBeDeleted && isGenerationPinned (generation , pinnedGenerations ) == false ) {
269+ // The check with minRemoteGenReferenced is redundant but kept as to make sure we don't delete generations
270+ // that are not persisted in remote segment store yet.
271+ if (generation < minRemoteGenReferenced && isGenerationPinned (generation , pinnedGenerations ) == false ) {
266272 generationsToBeDeleted .add (generation );
267273 }
268274 }
269275 return generationsToBeDeleted ;
270276 }
271277
272- protected List <String > getMetadataFilesToBeDeleted (List <String > metadataFiles ) {
273- return getMetadataFilesToBeDeleted (metadataFiles , metadataFilePinnedTimestampMap , logger );
278+ protected List <String > getMetadataFilesToBeDeleted (List <String > metadataFiles , boolean indexDeleted ) {
279+ return getMetadataFilesToBeDeleted (
280+ metadataFiles ,
281+ metadataFilePinnedTimestampMap ,
282+ minRemoteGenReferenced ,
283+ Map .of (),
284+ indexDeleted ,
285+ logger
286+ );
274287 }
275288
276289 // Visible for testing
277290 protected static List <String > getMetadataFilesToBeDeleted (
278291 List <String > metadataFiles ,
279292 Map <Long , String > metadataFilePinnedTimestampMap ,
293+ long minRemoteGenReferenced ,
294+ Map <String , Long > pinnedTimestampsToSkip ,
295+ boolean indexDeleted ,
280296 Logger logger
281297 ) {
282- Tuple <Long , Set <Long >> pinnedTimestampsState = RemoteStorePinnedTimestampService .getPinnedTimestamps ();
298+ Tuple <Long , Set <Long >> pinnedTimestampsState = RemoteStorePinnedTimestampService .getPinnedTimestamps (pinnedTimestampsToSkip );
283299
284300 // Keep files since last successful run of scheduler
285301 List <String > metadataFilesToBeDeleted = RemoteStoreUtils .filterOutMetadataFilesBasedOnAge (
@@ -312,6 +328,22 @@ protected static List<String> getMetadataFilesToBeDeleted(
312328 metadataFilesToBeDeleted .size ()
313329 );
314330
331+ if (indexDeleted == false ) {
332+ // Filter out metadata files based on minRemoteGenReferenced
333+ List <String > metadataFilesContainingMinRemoteGenReferenced = metadataFilesToBeDeleted .stream ().filter (md -> {
334+ long maxGeneration = TranslogTransferMetadata .getMaxGenerationFromFileName (md );
335+ return maxGeneration == -1 || maxGeneration > minRemoteGenReferenced ;
336+ }).collect (Collectors .toList ());
337+ metadataFilesToBeDeleted .removeAll (metadataFilesContainingMinRemoteGenReferenced );
338+
339+ logger .trace (
340+ "metadataFilesContainingMinRemoteGenReferenced.size = {}, metadataFilesToBeDeleted based on minRemoteGenReferenced filtering = {}, minRemoteGenReferenced = {}" ,
341+ metadataFilesContainingMinRemoteGenReferenced .size (),
342+ metadataFilesToBeDeleted .size (),
343+ minRemoteGenReferenced
344+ );
345+ }
346+
315347 return metadataFilesToBeDeleted ;
316348 }
317349
@@ -472,50 +504,65 @@ protected static Tuple<Long, Long> getMinMaxPrimaryTermFromMetadataFile(
472504 }
473505 }
474506
475- public static void cleanup (TranslogTransferManager translogTransferManager ) throws IOException {
476- ActionListener <List <BlobMetadata >> listMetadataFilesListener = new ActionListener <>() {
477- @ Override
478- public void onResponse (List <BlobMetadata > blobMetadata ) {
479- List <String > metadataFiles = blobMetadata .stream ().map (BlobMetadata ::name ).collect (Collectors .toList ());
507+ public static void cleanup (
508+ TranslogTransferManager translogTransferManager ,
509+ boolean forceClean ,
510+ Map <String , Long > pinnedTimestampsToSkip
511+ ) throws IOException {
512+ if (forceClean ) {
513+ translogTransferManager .delete ();
514+ } else {
515+ ActionListener <List <BlobMetadata >> listMetadataFilesListener = new ActionListener <>() {
516+ @ Override
517+ public void onResponse (List <BlobMetadata > blobMetadata ) {
518+ List <String > metadataFiles = blobMetadata .stream ().map (BlobMetadata ::name ).collect (Collectors .toList ());
519+
520+ try {
521+ if (metadataFiles .isEmpty ()) {
522+ staticLogger .debug ("No stale translog metadata files found" );
523+ return ;
524+ }
525+ List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (
526+ metadataFiles ,
527+ new HashMap <>(),
528+ Long .MAX_VALUE ,
529+ pinnedTimestampsToSkip ,
530+ true ,
531+ staticLogger
532+ );
533+ if (metadataFilesToBeDeleted .isEmpty ()) {
534+ staticLogger .debug ("No metadata files to delete" );
535+ return ;
536+ }
537+ staticLogger .debug (() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted );
480538
481- try {
482- if (metadataFiles .isEmpty ()) {
483- staticLogger .debug ("No stale translog metadata files found" );
484- return ;
485- }
486- List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (metadataFiles , new HashMap <>(), staticLogger );
487- if (metadataFilesToBeDeleted .isEmpty ()) {
488- staticLogger .debug ("No metadata files to delete" );
489- return ;
490- }
491- staticLogger .debug (() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted );
539+ // For all the files that we are keeping, fetch min and max generations
540+ List <String > metadataFilesNotToBeDeleted = new ArrayList <>(metadataFiles );
541+ metadataFilesNotToBeDeleted .removeAll (metadataFilesToBeDeleted );
542+ staticLogger .debug (() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted );
492543
493- // For all the files that we are keeping, fetch min and max generations
494- List <String > metadataFilesNotToBeDeleted = new ArrayList <>(metadataFiles );
495- metadataFilesNotToBeDeleted .removeAll (metadataFilesToBeDeleted );
496- staticLogger .debug (() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted );
544+ // Delete stale metadata files
545+ translogTransferManager .deleteMetadataFilesAsync (metadataFilesToBeDeleted , () -> {});
497546
498- // Delete stale metadata files
499- translogTransferManager .deleteMetadataFilesAsync (metadataFilesToBeDeleted , () -> {});
547+ // Delete stale primary terms
548+ deleteStaleRemotePrimaryTerms (
549+ metadataFilesNotToBeDeleted ,
550+ translogTransferManager ,
551+ new HashMap <>(),
552+ new AtomicLong (Long .MAX_VALUE ),
553+ staticLogger
554+ );
555+ } catch (Exception e ) {
556+ staticLogger .error ("Exception while cleaning up metadata and primary terms" , e );
557+ }
558+ }
500559
501- // Delete stale primary terms
502- deleteStaleRemotePrimaryTerms (
503- metadataFilesNotToBeDeleted ,
504- translogTransferManager ,
505- new HashMap <>(),
506- new AtomicLong (Long .MAX_VALUE ),
507- staticLogger
508- );
509- } catch (Exception e ) {
560+ @ Override
561+ public void onFailure (Exception e ) {
510562 staticLogger .error ("Exception while cleaning up metadata and primary terms" , e );
511563 }
512- }
513-
514- @ Override
515- public void onFailure (Exception e ) {
516- staticLogger .error ("Exception while cleaning up metadata and primary terms" , e );
517- }
518- };
519- translogTransferManager .listTranslogMetadataFilesAsync (listMetadataFilesListener );
564+ };
565+ translogTransferManager .listTranslogMetadataFilesAsync (listMetadataFilesListener );
566+ }
520567 }
521568}
0 commit comments