diff --git a/CHANGELOG.md b/CHANGELOG.md index 8599977452207..0c2f7652a1faa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Implement GRPC Search params `Highlight`and `Sort` ([#19868](https://github.com/opensearch-project/OpenSearch/pull/19868)) - Implement GRPC ConstantScoreQuery, FuzzyQuery, MatchBoolPrefixQuery, MatchPhrasePrefix, PrefixQuery, MatchQuery ([#19854](https://github.com/opensearch-project/OpenSearch/pull/19854)) - Add async periodic flush task support for pull-based ingestion ([#19878](https://github.com/opensearch-project/OpenSearch/pull/19878)) +- Add support for context aware segments ([#19098](https://github.com/opensearch-project/OpenSearch/pull/19098)) ### Changed - Faster `terms` query creation for `keyword` field with index and docValues enabled ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350)) diff --git a/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryAwareStore.java b/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryAwareStore.java index 351fb4f0c7fe5..af8b2c917b953 100644 --- a/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryAwareStore.java +++ b/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryAwareStore.java @@ -26,6 +26,7 @@ import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; +import org.opensearch.plugins.IndexStorePlugin; import java.io.IOException; import java.nio.file.FileVisitResult; @@ -79,6 +80,37 @@ public SubdirectoryAwareStore( super(shardId, indexSettings, new SubdirectoryAwareDirectory(directory, shardPath), shardLock, onClose, shardPath); } + /** + * Constructor for SubdirectoryAwareStore. + * + * @param shardId the shard ID + * @param indexSettings the index settings + * @param directory the directory to use for the store + * @param shardLock the shard lock + * @param onClose the on close callback + * @param shardPath the shard path + * @param directoryFactory the directory factory + */ + public SubdirectoryAwareStore( + ShardId shardId, + IndexSettings indexSettings, + Directory directory, + ShardLock shardLock, + OnClose onClose, + ShardPath shardPath, + IndexStorePlugin.DirectoryFactory directoryFactory + ) { + super( + shardId, + indexSettings, + new SubdirectoryAwareDirectory(directory, shardPath), + shardLock, + onClose, + shardPath, + directoryFactory + ); + } + @Override public MetadataSnapshot getMetadata(IndexCommit commit) throws IOException { long totalNumDocs = 0; diff --git a/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePlugin.java b/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePlugin.java index f25e22d90633a..7c1836256469f 100644 --- a/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePlugin.java +++ b/modules/store-subdirectory/src/main/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePlugin.java @@ -81,5 +81,31 @@ public Store newStore( ) { return new SubdirectoryAwareStore(shardId, indexSettings, directory, shardLock, onClose, shardPath); } + + /** + * Creates a new {@link SubdirectoryAwareStore} instance. + * + * @param shardId the shard identifier + * @param indexSettings the index settings + * @param directory the underlying Lucene directory + * @param shardLock the shard lock + * @param onClose callback to execute when the store is closed + * @param shardPath the path information for the shard + * @param directoryFactory the directory factory to create child level directory. + * Used for Context Aware Segments enabled indices. + * @return a new SubdirectoryAwareStore instance + */ + @Override + public Store newStore( + ShardId shardId, + IndexSettings indexSettings, + Directory directory, + ShardLock shardLock, + Store.OnClose onClose, + ShardPath shardPath, + DirectoryFactory directoryFactory + ) { + return new SubdirectoryAwareStore(shardId, indexSettings, directory, shardLock, onClose, shardPath, directoryFactory); + } } } diff --git a/modules/store-subdirectory/src/test/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePluginTests.java b/modules/store-subdirectory/src/test/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePluginTests.java index b7f2f54a09ff2..68ff237beb023 100644 --- a/modules/store-subdirectory/src/test/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePluginTests.java +++ b/modules/store-subdirectory/src/test/java/org/opensearch/plugin/store/subdirectory/SubdirectoryStorePluginTests.java @@ -20,6 +20,7 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.store.FsDirectoryFactory; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreStats; import org.opensearch.plugins.IndexStorePlugin; @@ -67,7 +68,8 @@ public void testStats() throws IOException { SubdirectoryStorePluginTests.newFSDirectory(path.resolve("index")), new DummyShardLock(shardId), Store.OnClose.EMPTY, - new ShardPath(false, path, path, shardId) + new ShardPath(false, path, path, shardId), + new FsDirectoryFactory() ); long initialStoreSize = 0; diff --git a/plugins/store-smb/src/main/java/org/opensearch/index/store/smbmmapfs/SmbMmapFsDirectoryFactory.java b/plugins/store-smb/src/main/java/org/opensearch/index/store/smbmmapfs/SmbMmapFsDirectoryFactory.java index c38597720c5a9..2c8cae98e75dc 100644 --- a/plugins/store-smb/src/main/java/org/opensearch/index/store/smbmmapfs/SmbMmapFsDirectoryFactory.java +++ b/plugins/store-smb/src/main/java/org/opensearch/index/store/smbmmapfs/SmbMmapFsDirectoryFactory.java @@ -47,7 +47,7 @@ public final class SmbMmapFsDirectoryFactory extends FsDirectoryFactory { @Override - protected Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { return new SmbDirectoryWrapper( setPreload( new MMapDirectory(location, lockFactory), diff --git a/plugins/store-smb/src/main/java/org/opensearch/index/store/smbniofs/SmbNIOFsDirectoryFactory.java b/plugins/store-smb/src/main/java/org/opensearch/index/store/smbniofs/SmbNIOFsDirectoryFactory.java index 200f72dd66d89..04cf453fe2bad 100644 --- a/plugins/store-smb/src/main/java/org/opensearch/index/store/smbniofs/SmbNIOFsDirectoryFactory.java +++ b/plugins/store-smb/src/main/java/org/opensearch/index/store/smbniofs/SmbNIOFsDirectoryFactory.java @@ -24,7 +24,7 @@ public final class SmbNIOFsDirectoryFactory extends FsDirectoryFactory { @Override - protected Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { return new SmbDirectoryWrapper(new NIOFSDirectory(location, lockFactory)); } } diff --git a/server/src/main/java/org/opensearch/OpenSearchServerException.java b/server/src/main/java/org/opensearch/OpenSearchServerException.java index 7e299abd8d943..65f59d179cbab 100644 --- a/server/src/main/java/org/opensearch/OpenSearchServerException.java +++ b/server/src/main/java/org/opensearch/OpenSearchServerException.java @@ -24,6 +24,7 @@ import static org.opensearch.Version.V_2_7_0; import static org.opensearch.Version.V_3_0_0; import static org.opensearch.Version.V_3_2_0; +import static org.opensearch.Version.V_3_3_0; /** * Utility class to register server exceptions @@ -1241,5 +1242,13 @@ public static void registerExceptions() { V_3_2_0 ) ); + registerExceptionHandle( + new OpenSearchExceptionHandle( + org.opensearch.index.engine.LookupMapLockAcquisitionException.class, + org.opensearch.index.engine.LookupMapLockAcquisitionException::new, + CUSTOM_ELASTICSEARCH_EXCEPTIONS_BASE_ID + 2, + V_3_3_0 + ) + ); } } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java index e9e9cb7f37532..37ca3161117d5 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java @@ -87,6 +87,7 @@ import org.opensearch.index.IndexingPressureService; import org.opensearch.index.SegmentReplicationPressureService; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.LookupMapLockAcquisitionException; import org.opensearch.index.engine.VersionConflictEngineException; import org.opensearch.index.get.GetResult; import org.opensearch.index.mapper.MapperException; @@ -727,7 +728,15 @@ && isConflictException(executionResult.getFailure().getCause()) && context.getRetryCounter() < ((UpdateRequest) docWriteRequest).retryOnConflict()) { context.resetForExecutionForRetry(); return; - } + } else if (isFailed + && context.getPrimary() != null + && context.getPrimary().indexSettings() != null + && context.getPrimary().indexSettings().isContextAwareEnabled() + && isLookupMapLockAcquisitionException(executionResult.getFailure().getCause()) + && context.getRetryCounter() < context.getPrimary().indexSettings().getMaxRetryOnLookupMapAcquisitionException()) { + context.resetForExecutionForRetry(); + return; + } final BulkItemResponse response; if (isUpdate) { response = processUpdateResponse((UpdateRequest) docWriteRequest, context.getConcreteIndex(), executionResult, updateResult); @@ -756,6 +765,10 @@ private static boolean isConflictException(final Exception e) { return ExceptionsHelper.unwrapCause(e) instanceof VersionConflictEngineException; } + private static boolean isLookupMapLockAcquisitionException(final Exception e) { + return ExceptionsHelper.unwrapCause(e) instanceof LookupMapLockAcquisitionException; + } + /** * Creates a new bulk item result from the given requests and result of performing the update operation on the shard. */ diff --git a/server/src/main/java/org/opensearch/common/lucene/Lucene.java b/server/src/main/java/org/opensearch/common/lucene/Lucene.java index 61530b13dfcf1..599697b169086 100644 --- a/server/src/main/java/org/opensearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/opensearch/common/lucene/Lucene.java @@ -91,6 +91,7 @@ import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.codec.CriteriaBasedCodec; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.plain.NonPruningSortedSetOrdinalsIndexFieldData.NonPruningSortField; import org.opensearch.search.sort.SortedWiderNumericSortField; @@ -939,10 +940,23 @@ public LeafReader wrap(LeafReader leaf) { // Two scenarios that we have hard-deletes: (1) from old segments where soft-deletes was disabled, // (2) when IndexWriter hits non-aborted exceptions. These two cases, IW flushes SegmentInfos // before exposing the hard-deletes, thus we can use the hard-delete count of SegmentInfos. - final int numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount(); + + // With CAS enabled segments, hard deletes can also be present, so correcting numDocs. + // We are using attribute value here to identify whether segment has CAS enabled or not. + int numDocs; + if (isContextAwareEnabled(segmentReader)) { + numDocs = popCount(hardLiveDocs); + } else { + numDocs = segmentReader.maxDoc() - segmentReader.getSegmentInfo().getDelCount(); + } + assert numDocs == popCount(hardLiveDocs) : numDocs + " != " + popCount(hardLiveDocs); return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs); } + + private boolean isContextAwareEnabled(SegmentReader reader) { + return reader.getSegmentInfo().info.getAttribute(CriteriaBasedCodec.BUCKET_NAME) != null; + } }); } diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 9f4efcdbc25e0..6a3d47986122e 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -176,6 +176,8 @@ public final class IndexScopedSettings extends AbstractScopedSettings { ShardsLimitAllocationDecider.INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING, IndexSettings.INDEX_GC_DELETES_SETTING, IndexSettings.INDEX_SOFT_DELETES_SETTING, + IndexSettings.INDEX_CONTEXT_AWARE_ENABLED_SETTING, + IndexSettings.INDEX_MAX_RETRY_ON_LOOKUP_MAP_LOCK_ACQUISITION_EXCEPTION, IndexSettings.INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING, IndexSettings.INDEX_SOFT_DELETES_RETENTION_LEASE_PERIOD_SETTING, IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING, diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index 8a3086d56a7e1..8a24ed7fd5bae 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -36,6 +36,11 @@ public class FeatureFlags { */ public static final String REMOTE_STORE_MIGRATION_EXPERIMENTAL = FEATURE_FLAG_PREFIX + "remote_store.migration.enabled"; + /** + * Gates the visibility of the context aware segments. + */ + public static final String CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG = FEATURE_FLAG_PREFIX + "context_aware.migration.enabled"; + /** * Gates the functionality of extensions. * Once the feature is ready for production release, this feature flag can be removed. @@ -69,6 +74,12 @@ public class FeatureFlags { Property.NodeScope ); + public static final Setting CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING = Setting.boolSetting( + CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG, + false, + Property.NodeScope + ); + public static final Setting EXTENSIONS_SETTING = Setting.boolSetting(EXTENSIONS, false, Property.NodeScope); public static final Setting TELEMETRY_SETTING = Setting.boolSetting(TELEMETRY, false, Property.NodeScope); @@ -133,6 +144,7 @@ static class FeatureFlagsImpl { put(TERM_VERSION_PRECOMMIT_ENABLE_SETTING, TERM_VERSION_PRECOMMIT_ENABLE_SETTING.getDefault(Settings.EMPTY)); put(ARROW_STREAMS_SETTING, ARROW_STREAMS_SETTING.getDefault(Settings.EMPTY)); put(STREAM_TRANSPORT_SETTING, STREAM_TRANSPORT_SETTING.getDefault(Settings.EMPTY)); + put(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING, CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING.getDefault(Settings.EMPTY)); } }; diff --git a/server/src/main/java/org/opensearch/index/BucketedCompositeDirectory.java b/server/src/main/java/org/opensearch/index/BucketedCompositeDirectory.java new file mode 100644 index 0000000000000..e1d269c83d2cb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/BucketedCompositeDirectory.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Directory wrapper used to filter out child level directory for context aware enabled indices. + * + */ +public class BucketedCompositeDirectory extends FilterDirectory { + + public static final String CHILD_DIRECTORY_PREFIX = "temp_"; + + protected BucketedCompositeDirectory(Directory in) { + super(in); + } + + /** + * List all files within directory filtering out child level directory. + * @return files excluding child level directory. + * + * @throws IOException in case of I/O error + */ + @Override + public String[] listAll() throws IOException { + return Arrays.stream(super.listAll()) + .filter(fileName -> !fileName.startsWith(CHILD_DIRECTORY_PREFIX)) + .distinct() + .toArray(String[]::new); + } +} diff --git a/server/src/main/java/org/opensearch/index/CriteriaBasedMergePolicy.java b/server/src/main/java/org/opensearch/index/CriteriaBasedMergePolicy.java new file mode 100644 index 0000000000000..489a7c1eca64a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/CriteriaBasedMergePolicy.java @@ -0,0 +1,82 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.apache.lucene.index.FilterMergePolicy; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeTrigger; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfos; +import org.opensearch.index.codec.CriteriaBasedCodec; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Wrapper merge policy which is used for context aware enabled indices. This merge policy merges segments that belongs + * to same bucket. + * + */ +public class CriteriaBasedMergePolicy extends FilterMergePolicy { + + protected final MergePolicy in; + + public CriteriaBasedMergePolicy(MergePolicy in) { + super(in); + this.in = in; + } + + /** + * Merges the segments belonging to same group. + * + * @param mergeTrigger the event that triggered the merge + * @param infos the total set of segments in the index + * @param mergeContext the IndexWriter to find the merges on + * @return + * @throws IOException + */ + @Override + public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException { + final Set merging = mergeContext.getMergingSegments(); + MergeSpecification spec = null; + final Map> commitInfos = new HashMap<>(); + for (SegmentCommitInfo si : infos) { + if (merging.contains(si)) { + continue; + } + + final String dwptGroupNumber = si.info.getAttribute(CriteriaBasedCodec.BUCKET_NAME); + commitInfos.computeIfAbsent(dwptGroupNumber, k -> new ArrayList<>()).add(si); + } + + for (String dwptGroupNumber : commitInfos.keySet()) { + if (commitInfos.get(dwptGroupNumber).size() > 1) { + final SegmentInfos newSIS = new SegmentInfos(infos.getIndexCreatedVersionMajor()); + for (SegmentCommitInfo info : commitInfos.get(dwptGroupNumber)) { + newSIS.add(info); + } + + final MergeSpecification tieredMergePolicySpec = in.findMerges(mergeTrigger, newSIS, mergeContext); + if (tieredMergePolicySpec != null) { + if (spec == null) { + spec = new MergeSpecification(); + } + + spec.merges.addAll(tieredMergePolicySpec.merges); + } + } + } + + return spec; + } +} diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index c818f07d419cd..1a4b14ddef9ba 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -38,6 +38,8 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockFactory; import org.apache.lucene.util.Constants; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; @@ -62,6 +64,7 @@ import org.opensearch.core.indices.breaker.CircuitBreakerService; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.NodeEnvironment; +import org.opensearch.env.ShardLock; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.cache.query.DisabledQueryCache; @@ -76,6 +79,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexingOperationListener; import org.opensearch.index.shard.SearchOperationListener; +import org.opensearch.index.shard.ShardPath; import org.opensearch.index.similarity.SimilarityService; import org.opensearch.index.store.DefaultCompositeDirectoryFactory; import org.opensearch.index.store.FsDirectoryFactory; @@ -98,6 +102,7 @@ import org.opensearch.transport.client.Client; import java.io.IOException; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -848,7 +853,18 @@ private static IndexStorePlugin.DirectoryFactory getDirectoryFactory( throw new IllegalArgumentException("Unknown store type [" + storeType + "]"); } } - return factory; + + return new IndexStorePlugin.DirectoryFactory() { + @Override + public Directory newDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException { + return new BucketedCompositeDirectory(factory.newDirectory(indexSettings, shardPath)); + } + + @Override + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + return factory.newFSDirectory(location, lockFactory, indexSettings); + } + }; } private static IndexStorePlugin.CompositeDirectoryFactory getCompositeDirectoryFactory( @@ -892,7 +908,33 @@ private static IndexStorePlugin.StoreFactory resolveStoreFactory( ) { final String key = indexSettings.getValue(INDEX_STORE_FACTORY_SETTING); if (key == null || key.isEmpty()) { - return Store::new; + return new IndexStorePlugin.StoreFactory() { + + @Override + public Store newStore( + ShardId shardId, + IndexSettings indexSettings, + Directory directory, + ShardLock shardLock, + Store.OnClose onClose, + ShardPath shardPath + ) throws IOException { + return new Store(shardId, indexSettings, directory, shardLock, onClose, shardPath); + } + + @Override + public Store newStore( + ShardId shardId, + IndexSettings indexSettings, + Directory directory, + ShardLock shardLock, + Store.OnClose onClose, + ShardPath shardPath, + IndexStorePlugin.DirectoryFactory directoryFactory + ) throws IOException { + return new Store(shardId, indexSettings, directory, shardLock, onClose, shardPath, directoryFactory); + } + }; } final IndexStorePlugin.StoreFactory factory = storeFactories.get(key); if (factory == null) { diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 779d641c718aa..2a862dd94b43e 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -728,7 +728,15 @@ protected void closeInternal() { // Do nothing for shard lock on remote store } }; - remoteStore = new Store(shardId, this.indexSettings, remoteDirectory, remoteStoreLock, Store.OnClose.EMPTY, path); + remoteStore = new Store( + shardId, + this.indexSettings, + remoteDirectory, + remoteStoreLock, + Store.OnClose.EMPTY, + path, + directoryFactory + ); } else { // Disallow shards with remote store based settings to be created on non-remote store enabled nodes // Even though we have `RemoteStoreMigrationAllocationDecider` in place to prevent something like this from happening at the @@ -763,7 +771,8 @@ protected void closeInternal() { directory, lock, new StoreCloseListener(shardId, () -> eventListener.onStoreClosed(shardId)), - path + path, + directoryFactory ); eventListener.onStoreCreated(shardId); indexShard = new IndexShard( diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 4f6198b42ec03..7c5be554a7760 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -43,6 +43,7 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.Strings; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; @@ -69,6 +70,8 @@ import java.util.function.UnaryOperator; import static org.opensearch.Version.V_2_7_0; +import static org.opensearch.common.util.FeatureFlags.CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG; +import static org.opensearch.common.util.FeatureFlags.CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING; import static org.opensearch.index.codec.fuzzy.FuzzySetParameters.DEFAULT_FALSE_POSITIVE_PROBABILITY; import static org.opensearch.index.mapper.MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING; import static org.opensearch.index.mapper.MapperService.INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING; @@ -471,6 +474,37 @@ public static IndexMergePolicy fromString(String text) { Property.Final ); + /** + * Specifies if the index should be context aware enabled + */ + public static final Setting INDEX_CONTEXT_AWARE_ENABLED_SETTING = Setting.boolSetting( + "index.context_aware.enabled", + false, + value -> { + if (FeatureFlags.isEnabled(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG) == false && value == true) { + throw new IllegalArgumentException( + "FeatureFlag " + CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG + " must be enabled to set this property to true" + ); + } + }, + Property.IndexScope, + Property.Final + ); + + /** + * Maximum number of indexing request retries in case LookupMapLockAcquisitionException is encountered for + * context aware indexes. + * + */ + public static final Setting INDEX_MAX_RETRY_ON_LOOKUP_MAP_LOCK_ACQUISITION_EXCEPTION = Setting.intSetting( + "index.context_aware.max_retry_on_lookup_map_acquisition_exception", + 15, + 5, + 100, + Setting.Property.IndexScope, + Property.Dynamic + ); + /** * Controls how many soft-deleted documents will be kept around before being merged away. Keeping more deleted * documents increases the chance of operation-based recoveries and allows querying a longer history of documents. @@ -874,6 +908,8 @@ public static IndexMergePolicy fromString(String text) { private final IndexScopedSettings scopedSettings; private long gcDeletesInMillis = DEFAULT_GC_DELETES.millis(); private final boolean softDeleteEnabled; + private final boolean contextAwareEnabled; + private int maxRetryOnLookupMapAcquisitionException; private volatile long softDeleteRetentionOperations; private volatile long retentionLeaseMillis; @@ -1087,6 +1123,8 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti mergeSchedulerConfig = new MergeSchedulerConfig(this); gcDeletesInMillis = scopedSettings.get(INDEX_GC_DELETES_SETTING).getMillis(); softDeleteEnabled = scopedSettings.get(INDEX_SOFT_DELETES_SETTING); + contextAwareEnabled = scopedSettings.get(INDEX_CONTEXT_AWARE_ENABLED_SETTING); + maxRetryOnLookupMapAcquisitionException = scopedSettings.get(INDEX_MAX_RETRY_ON_LOOKUP_MAP_LOCK_ACQUISITION_EXCEPTION); assert softDeleteEnabled || version.before(Version.V_2_0_0) : "soft deletes must be enabled in version " + version; softDeleteRetentionOperations = scopedSettings.get(INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING); retentionLeaseMillis = scopedSettings.get(INDEX_SOFT_DELETES_RETENTION_LEASE_PERIOD_SETTING).millis(); @@ -1254,6 +1292,10 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti scopedSettings.addSettingsUpdateConsumer(INDEX_MERGE_ON_FLUSH_ENABLED, this::setMergeOnFlushEnabled); scopedSettings.addSettingsUpdateConsumer(INDEX_MERGE_ON_FLUSH_POLICY, this::setMergeOnFlushPolicy); scopedSettings.addSettingsUpdateConsumer(DEFAULT_SEARCH_PIPELINE, this::setDefaultSearchPipeline); + scopedSettings.addSettingsUpdateConsumer( + INDEX_MAX_RETRY_ON_LOOKUP_MAP_LOCK_ACQUISITION_EXCEPTION, + this::setMaxRetryOnLookupMapAcquisitionException + ); scopedSettings.addSettingsUpdateConsumer( INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, this::setRemoteTranslogUploadBufferInterval @@ -2036,6 +2078,18 @@ public boolean isSoftDeleteEnabled() { return softDeleteEnabled; } + public boolean isContextAwareEnabled() { + return contextAwareEnabled && FeatureFlags.isEnabled(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_SETTING); + } + + private void setMaxRetryOnLookupMapAcquisitionException(int maxRetryOnLookupMapAcquisitionException) { + this.maxRetryOnLookupMapAcquisitionException = maxRetryOnLookupMapAcquisitionException; + } + + public int getMaxRetryOnLookupMapAcquisitionException() { + return maxRetryOnLookupMapAcquisitionException; + } + private void setSoftDeleteRetentionOperations(long ops) { this.softDeleteRetentionOperations = ops; } diff --git a/server/src/main/java/org/opensearch/index/codec/CriteriaBasedCodec.java b/server/src/main/java/org/opensearch/index/codec/CriteriaBasedCodec.java new file mode 100644 index 0000000000000..3c9911d03f987 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/CriteriaBasedCodec.java @@ -0,0 +1,72 @@ +/*Add commentMore actions + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.lucene103.Lucene103Codec; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +import java.io.IOException; + +/** + * Filter codec used to attach bucket attributes to segments of child writer. + * + */ +public class CriteriaBasedCodec extends FilterCodec { + + private final String bucket; + public static final String BUCKET_NAME = "bucket"; + private static final String PLACEHOLDER_BUCKET_FOR_PARENT_WRITER = "-2"; + + public CriteriaBasedCodec() { + super("CriteriaBasedCodec", new Lucene103Codec()); + bucket = null; + } + + public CriteriaBasedCodec(Codec delegate, String bucket) { + super("CriteriaBasedCodec", delegate); + this.bucket = bucket; + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return new SegmentInfoFormat() { + @Override + public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { + return delegate.segmentInfoFormat().read(directory, segmentName, segmentID, context); + } + + @Override + public void write(Directory directory, SegmentInfo info, IOContext ioContext) throws IOException { + if (bucket != null) { + // We will set BUCKET_NAME attribute only for child writer where bucket will set. + info.putAttribute(BUCKET_NAME, bucket); + } else if (info.getAttribute(BUCKET_NAME) == null) { + // For segment belonging to parent writer, attributes will be set. In case write went to parent + // writer (like for no ops writes or for temporary tombstone entry which is added for deletes/updates + // to sync version across child and parent writers), segments corresponding to those writer does not + // have + info.putAttribute(BUCKET_NAME, PLACEHOLDER_BUCKET_FOR_PARENT_WRITER); + } + + delegate.segmentInfoFormat().write(directory, info, ioContext); + } + }; + } + + @Override + public DocValuesFormat docValuesFormat() { + return new CriteriaBasedDocValueFormat(bucket); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/CriteriaBasedDocValueFormat.java b/server/src/main/java/org/opensearch/index/codec/CriteriaBasedDocValueFormat.java new file mode 100644 index 0000000000000..93948ad896e52 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/CriteriaBasedDocValueFormat.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +/** + * Doc value format for context aware enabled indices. This is used to attach bucket attributes to field info for + * segments of child level writers. This is used to ensure attributes remain intact during segment merges. + * + * @opensearch.internal + */ +public class CriteriaBasedDocValueFormat extends DocValuesFormat { + + private final DocValuesFormat delegate; + private final String bucket; + + public CriteriaBasedDocValueFormat() { + this(new Lucene90DocValuesFormat(), null); + } + + public CriteriaBasedDocValueFormat(String bucket) { + this(new Lucene90DocValuesFormat(), bucket); + } + + /** + * Creates a new docvalues format. + * + */ + protected CriteriaBasedDocValueFormat(DocValuesFormat delegate, String bucket) { + super(delegate.getName()); + this.delegate = delegate; + this.bucket = bucket; + } + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + return new CriteriaBasedDocValuesWriter(delegate.fieldsConsumer(state), bucket); + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { + return delegate.fieldsProducer(state); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/CriteriaBasedDocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/CriteriaBasedDocValuesWriter.java new file mode 100644 index 0000000000000..b0d31ff3a3d5f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/CriteriaBasedDocValuesWriter.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.MergeState; + +import java.io.IOException; + +/** + * This is used to attach bucket attributes to seq_no field info for segments of child level writers. This is used to + * ensure attributes remain intact during segment merges. + * + * @opensearch.internal + */ +public class CriteriaBasedDocValuesWriter extends DocValuesConsumer { + + private final DocValuesConsumer delegate; + private final String bucket; + + public CriteriaBasedDocValuesWriter(DocValuesConsumer delegate, String bucket) throws IOException { + this.delegate = delegate; + this.bucket = bucket; + } + + @Override + public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addNumericField(field, valuesProducer); + if (field.name.equals("_seq_no") && bucket != null) { + field.putAttribute(CriteriaBasedCodec.BUCKET_NAME, bucket); + } + } + + @Override + public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addBinaryField(field, valuesProducer); + } + + @Override + public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addSortedField(field, valuesProducer); + } + + @Override + public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addSortedNumericField(field, valuesProducer); + } + + @Override + public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addSortedSetField(field, valuesProducer); + } + + @Override + public void merge(MergeState mergeState) throws IOException { + super.merge(mergeState); + mergeState.segmentInfo.putAttribute( + CriteriaBasedCodec.BUCKET_NAME, + mergeState.mergeFieldInfos.fieldInfo("_seq_no").getAttribute(CriteriaBasedCodec.BUCKET_NAME) + ); + mergeState.segmentInfo.putAttribute("merge", "true"); + } + + @Override + public void close() throws IOException { + delegate.close(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/CompositeIndexWriter.java b/server/src/main/java/org/opensearch/index/engine/CompositeIndexWriter.java new file mode 100644 index 0000000000000..e35c5dd1145d4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/CompositeIndexWriter.java @@ -0,0 +1,1084 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LiveIndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.opensearch.OpenSearchException; +import org.opensearch.common.CheckedBiFunction; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.logging.Loggers; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.common.util.concurrent.KeyedLock; +import org.opensearch.common.util.concurrent.ReleasableLock; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.Assertions; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.mapper.IdFieldMapper; +import org.opensearch.index.mapper.ParseContext; +import org.opensearch.index.mapper.SeqNoFieldMapper; +import org.opensearch.index.mapper.VersionFieldMapper; +import org.opensearch.index.store.Store; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.stream.Collectors; + +import static org.opensearch.index.BucketedCompositeDirectory.CHILD_DIRECTORY_PREFIX; + +/** + *

+ * InternalEngine delegates all IndexWriter specific operations through the + * CompositeIndexWriter class rather than directly interacting with IndexWriter. + * This wrapper serves as a unified interface for coordinating write operations + * with group-specific IndexWriters and managing read operations through an + * accumulating parent IndexWriter. This wrapper class also handles synchronization + * of group-specific IndexWriters with the accumulating IndexWriter during refresh + * by implementing the RefreshListener interface. + * + *

+ * In addition to managing group-specific IndexWriters, CompositeIndexWriter tracks + * all updates and deletions applied during each refresh cycle. This state is maintained + * using a refresh-rotating map structure analogous to LiveVersionMap's implementation. + * + *

+ * Indexing + *

+ * During indexing, CompositeIndexWriter evaluates the group for a document using a + * grouping criteria function. The specific IndexWriter selected for indexing a document + * depends on the outcome of the document for the grouping criteria function. Should the + * relevant IndexWriter entry inside the map be null, a new IndexWriter will be instantiated + * for this criteria and added to the map + *

+ * Version Resolution + *

+ * InternalEngine resolves the current version of a document before indexing it to + * determine whether the request is an indexing or update operation. InternalEngine + * performs this by first doing a lookup in the version map. In case no version of the + * document is present in the version map, it queries Lucene via the searcher to look + * for the current version of the document. Since the version map is maintained throughout + * an entire refresh cycle, there is no change in how versions are resolved in the above + * approach. InternalEngine performs a lookup for the document first in the version map + * followed by querying the document associated with the parent IndexWriter. + *

+ * + * Locking Mechanism + *

+ * OpenSearch currently utilizes ReentrantReadWriteLock to ensure the underlying + * IndexWriter is not closed during active indexing. With context-aware segments, an + * additional lock is used for each IndexWriterMap inside CompositeIndexWriter. + *

+ * + * During each write/update/delete operation, a read lock on the ReentrantLock + * associated with the map is acquired. This lock is released when indexing completes. + * During refresh, a write lock on the same ReentrantLock is obtained just before + * rotating the WriterMap. Since the write lock is acquired only when there is no + * active read lock on the writer, all writers in a map are closed and synced with + * the parent writer only when there are no active writes happening on these IndexWriters. + *

+ * + * Updates and Deletes + *

+ * With multiple IndexWriters, indexing and updates can occur on different IndexWriters. + * Therefore, document versions must be synchronized across IndexWriters. This is achieved + * by performing a partial soft delete (delete without indexing tombstone entry) on the + * IndexWriters containing the previous version of the document. + * + * @see org.opensearch.index.engine.InternalEngine + * @see org.apache.lucene.search.ReferenceManager.RefreshListener + * @see org.opensearch.index.engine.LiveVersionMap + * @see org.apache.lucene.index.IndexWriter + */ +public class CompositeIndexWriter implements DocumentIndexWriter { + + private final KeyedLock keyedLock = new KeyedLock<>(); + + private final EngineConfig engineConfig; + private final IndexWriter accumulatingIndexWriter; + private final CheckedBiFunction childIndexWriterFactory; + private final NumericDocValuesField softDeletesField; + protected final Logger logger; + private volatile AtomicBoolean closed; + private final SoftDeletesPolicy softDeletesPolicy; + private final Store store; + private static final String DUMMY_TOMBSTONE_DOC_ID = "-2"; + private final IndexWriterFactory nativeIndexWriterFactory; + + public CompositeIndexWriter( + EngineConfig engineConfig, + IndexWriter accumulatingIndexWriter, + SoftDeletesPolicy softDeletesPolicy, + NumericDocValuesField softDeletesField, + IndexWriterFactory nativeIndexWriterFactory + ) { + this.engineConfig = engineConfig; + this.accumulatingIndexWriter = accumulatingIndexWriter; + this.softDeletesPolicy = softDeletesPolicy; + this.childIndexWriterFactory = this::createChildWriterUtil; + this.softDeletesField = softDeletesField; + this.store = engineConfig.getStore(); + this.logger = Loggers.getLogger(Engine.class, engineConfig.getShardId()); + this.closed = new AtomicBoolean(false); + this.nativeIndexWriterFactory = nativeIndexWriterFactory; + } + + /** + * + * All write operations will now be handled by a pool of group specific disposable + * IndexWriters. These disposable IndexWriters will be modelled after Lucene's DWPTs + * (DocumentsWriterPerThread). + * + *

States of Disposable IndexWriters

+ * + *

Similar to DWPTs, these disposable IndexWriters will have three states:

+ * + *

Active

+ * + *

IndexWriters in this state will handle all write requests coming to InternalEngine. + * For each group/tenant, there will be at most a single IndexWriter that will be in the + * active state. OpenSearch maintains a mapping of active IndexWriters, each associated + * with a specific group. During indexing, the specific IndexWriter selected for indexing + * a document will depend on the outcome of the document for the grouping criteria function. + * Should there be no active IndexWriter for a group, a new IndexWriter will be instantiated + * for this criteria and added to the pool.

+ * + *

Mark for Refresh

+ * + *

During refresh, we transition all group specific active IndexWriters from active pool + * to an intermediate refresh pending state. At this stage, these IndexWriters will not be + * accepting any active writes, but will continue to handle any ongoing operation.

+ * + *

Close

+ * + *

At this stage, OpenSearch will sync the content of group specific IndexWriters with + * an accumulating parent IndexWriter via Lucene's addIndexes API call. Post the sync, we + * remove all group specific IndexWriters from Mark for refresh stage and close them.

+ * + */ + static class DisposableIndexWriter { + + private final IndexWriter indexWriter; + private final CriteriaBasedIndexWriterLookup lookupMap; + + public DisposableIndexWriter(IndexWriter indexWriter, CriteriaBasedIndexWriterLookup lookupMap) { + this.indexWriter = indexWriter; + this.lookupMap = lookupMap; + } + + public IndexWriter getIndexWriter() { + return indexWriter; + } + + public CriteriaBasedIndexWriterLookup getLookupMap() { + return lookupMap; + } + } + + /** + * This class represents a lookup entry inside LiveIndexWriterDeletesMap. This class is mapped similar to + * LiveVersionMap.VersionLookup. This is maintained on per refresh cycle basis. This contains the group + * specific IndexWriter associated with this refresh cycle, the updates/deletes that came in this refresh cycle and + * a pair of read/write lock which is used to ensure that a look is correctly closed (no ongoing operation on + * IndexWriter associated with lookup). Composite IndexWriter syncs a lookup with accumulating IndexWriter during + * each refresh cycle. + * + */ + public static final class CriteriaBasedIndexWriterLookup implements Closeable { + private final Map criteriaBasedIndexWriterMap; + private final Map lastDeleteEntrySet; + private final Map criteria; + private final ReentrantReadWriteLock mapLock; + private final CriteriaBasedWriterLock mapReadLock; + private final ReleasableLock mapWriteLock; + private final long version; + private boolean closed; + + private static final CriteriaBasedIndexWriterLookup EMPTY = new CriteriaBasedIndexWriterLookup( + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + 0 + ); + + private CriteriaBasedIndexWriterLookup( + final Map criteriaBasedIndexWriterMap, + Map lastDeleteEntrySet, + Map criteria, + long version + ) { + this.criteriaBasedIndexWriterMap = criteriaBasedIndexWriterMap; + this.lastDeleteEntrySet = lastDeleteEntrySet; + this.mapLock = new ReentrantReadWriteLock(); + this.mapReadLock = new CriteriaBasedWriterLock(mapLock.readLock(), this); + this.mapWriteLock = new ReleasableLock(mapLock.writeLock()); + this.criteria = criteria; + this.version = version; + this.closed = false; + } + + DisposableIndexWriter computeIndexWriterIfAbsentForCriteria( + String criteria, + CheckedBiFunction indexWriterSupplier + ) { + return criteriaBasedIndexWriterMap.computeIfAbsent(criteria, (key) -> { + try { + return indexWriterSupplier.apply(criteria, this); + } catch (IOException e) { + throw new OpenSearchException(e); + } + }); + } + + DisposableIndexWriter getIndexWriterForCriteria(String criteria) { + return criteriaBasedIndexWriterMap.get(criteria); + } + + int sizeOfCriteriaBasedIndexWriterMap() { + return criteriaBasedIndexWriterMap.size(); + } + + int sizeOfLastDeleteEntrySet() { + return lastDeleteEntrySet.size(); + } + + void putLastDeleteEntry(BytesRef uid, DeleteEntry deleteEntry) { + lastDeleteEntrySet.put(uid, deleteEntry); + } + + void putCriteriaForDoc(BytesRef key, String criteria) { + this.criteria.put(key, criteria); + } + + String getCriteriaForDoc(BytesRef key) { + return criteria.get(key); + } + + void removeLastDeleteEntry(BytesRef key) { + lastDeleteEntrySet.remove(key); + } + + CriteriaBasedWriterLock getMapReadLock() { + return mapReadLock; + } + + boolean hasNewChanges() { + return !criteriaBasedIndexWriterMap.isEmpty() || !lastDeleteEntrySet.isEmpty(); + } + + @Override + public void close() throws IOException { + this.closed = true; + } + + public boolean isClosed() { + return closed; + } + + private static final class CriteriaBasedWriterLock implements Releasable { + private final Lock lock; + // a per-thread count indicating how many times the thread has entered the lock; only works if assertions are enabled + private final ThreadLocal holdingThreads; + private final CriteriaBasedIndexWriterLookup lookup; + + public CriteriaBasedWriterLock(Lock lock, CriteriaBasedIndexWriterLookup lookup) { + this.lock = lock; + if (Assertions.ENABLED) { + holdingThreads = new ThreadLocal<>(); + } else { + holdingThreads = null; + } + + this.lookup = lookup; + } + + @Override + public void close() { + lock.unlock(); + assert removeCurrentThread(); + } + + public CriteriaBasedIndexWriterLookup acquire() throws EngineException { + lock.lock(); + assert addCurrentThread(); + return lookup; + } + + /** + * Try acquiring lock, returning null if unable. + */ + public CriteriaBasedIndexWriterLookup tryAcquire() { + boolean locked = lock.tryLock(); + if (locked) { + assert addCurrentThread(); + return lookup; + } else { + return null; + } + } + + /** + * Try acquiring lock, returning null if unable to acquire lock within timeout. + */ + public CriteriaBasedIndexWriterLookup tryAcquire(TimeValue timeout) throws InterruptedException { + boolean locked = lock.tryLock(timeout.duration(), timeout.timeUnit()); + if (locked) { + assert addCurrentThread(); + return lookup; + } else { + return null; + } + } + + private boolean addCurrentThread() { + final Integer current = holdingThreads.get(); + holdingThreads.set(current == null ? 1 : current + 1); + return true; + } + + private boolean removeCurrentThread() { + final Integer count = holdingThreads.get(); + assert count != null && count > 0; + if (count == 1) { + holdingThreads.remove(); + } else { + holdingThreads.set(count - 1); + } + return true; + } + + public boolean isHeldByCurrentThread() { + if (holdingThreads == null) { + throw new UnsupportedOperationException("asserts must be enabled"); + } + final Integer count = holdingThreads.get(); + return count != null && count > 0; + } + } + } + + private static class DeleteEntry { + private final Term term; + private final long version; + private final long seqNo; + private final long primaryTerm; + + public DeleteEntry(Term term, long version, long seqNo, long primaryTerm) { + this.term = term; + this.version = version; + this.seqNo = seqNo; + this.primaryTerm = primaryTerm; + } + + public Term getTerm() { + return term; + } + } + + /** + * Map used for maintaining CriteriaBasedIndexWriterLookup + * + * @opensearch.internal + */ + final static class LiveIndexWriterDeletesMap { + // All writes (adds and deletes) go into here: + final CriteriaBasedIndexWriterLookup current; + + // Used while refresh is running, and to hold adds/deletes until refresh finishes. We read from both current and old on lookup: + final CriteriaBasedIndexWriterLookup old; + + LiveIndexWriterDeletesMap(CriteriaBasedIndexWriterLookup current, CriteriaBasedIndexWriterLookup old) { + this.current = current; + this.old = old; + } + + LiveIndexWriterDeletesMap() { + this( + new CriteriaBasedIndexWriterLookup( + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(), + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(), + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(), + 0 + ), + CriteriaBasedIndexWriterLookup.EMPTY + ); + } + + /** + * Builds a new map for the refresh transition this should be called in beforeRefresh() + */ + LiveIndexWriterDeletesMap buildTransitionMap() { + // This ensures writer map is not rotated during the time when we are obtaining an IndexWriter from map. As + // this may cause updates to go out of sync with current IndexWriter. + return new LiveIndexWriterDeletesMap( + new CriteriaBasedIndexWriterLookup( + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(current.sizeOfCriteriaBasedIndexWriterMap()), + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(current.sizeOfLastDeleteEntrySet()), + ConcurrentCollections.newConcurrentMapWithAggressiveConcurrency(current.sizeOfLastDeleteEntrySet()), + current.version + 1 + ), + current + ); + } + + /** + * builds a new map that invalidates the old map but maintains the current. This should be called in afterRefresh() + */ + LiveIndexWriterDeletesMap invalidateOldMap() { + return new LiveIndexWriterDeletesMap(current, CriteriaBasedIndexWriterLookup.EMPTY); + } + + void putLastDeleteEntryInCurrentMap(BytesRef uid, DeleteEntry deleteEntry) { + current.putLastDeleteEntry(uid, deleteEntry); + } + + void putCriteriaForDoc(BytesRef key, String criteria) { + current.putCriteriaForDoc(key, criteria); + } + + String getCriteriaForDoc(BytesRef key) { + return current.getCriteriaForDoc(key); + } + + DisposableIndexWriter computeIndexWriterIfAbsentForCriteria( + String criteria, + CheckedBiFunction indexWriterSupplier, + ShardId shardId + ) { + boolean success = false; + CriteriaBasedIndexWriterLookup current = null; + try { + current = getCurrentMap(); + if (current == null || current.isClosed()) { + throw new LookupMapLockAcquisitionException(shardId, "Unable to obtain lock on the current Lookup map", null); + } + + DisposableIndexWriter writer = current.computeIndexWriterIfAbsentForCriteria(criteria, indexWriterSupplier); + success = true; + return writer; + } finally { + if (success == false && current != null) { + assert current.mapReadLock.isHeldByCurrentThread() == true; + current.mapReadLock.close(); + } + } + } + + // This function acquires a first read lock on a map which does not have any write lock present. Current keeps + // on getting rotated during refresh, so there will be one current on which read lock can be obtained. + // Validate that no write lock is applied on the map and the map is not closed. Idea here is write lock was + // never applied on this map as write lock gets only during closing time. We are doing this instead of acquire, + // because acquire can also apply a read lock in case refresh completed and map is closed. + CriteriaBasedIndexWriterLookup getCurrentMap() { + return current.mapReadLock.tryAcquire(); + } + + // Used for Test Case. + ReleasableLock acquireCurrentWriteLock() { + return current.mapWriteLock.acquire(); + } + + boolean hasNewIndexingOrUpdates() { + return current.hasNewChanges() || old.hasNewChanges(); + } + } + + private volatile LiveIndexWriterDeletesMap liveIndexWriterDeletesMap = new LiveIndexWriterDeletesMap(); + + @Override + public void beforeRefresh() throws IOException { + // Rotate map first so all new writes goes to new generation writers. + liveIndexWriterDeletesMap = liveIndexWriterDeletesMap.buildTransitionMap(); + logger.debug("Trying to acquire write lock during refresh of composite IndexWriter. "); + try ( + Releasable ignore = liveIndexWriterDeletesMap.old.mapWriteLock.acquire(); + CriteriaBasedIndexWriterLookup oldMap = liveIndexWriterDeletesMap.old; + ) { + logger.debug("Acquired write lock during refresh of composite IndexWriter."); + // TODO No more write should happen post this, so that before refresh for syncing writers have all old writers available. + // TODO Or should we do this in Reader before listner where we are syncing data?? + refreshDocumentsForParentDirectory(oldMap); + } catch (Throwable ex) { + rollback(); + throw ex; + } + } + + private void refreshDocumentsForParentDirectory(CriteriaBasedIndexWriterLookup oldMap) throws IOException { + final Map markForRefreshIndexWritersMap = oldMap.criteriaBasedIndexWriterMap; + deletePreviousVersionsForUpdatedDocuments(); + final List directoryToCombine = new ArrayList<>(); + for (CompositeIndexWriter.DisposableIndexWriter childDisposableWriter : markForRefreshIndexWritersMap.values()) { + directoryToCombine.add(childDisposableWriter.getIndexWriter().getDirectory()); + childDisposableWriter.getIndexWriter().close(); + } + + if (!directoryToCombine.isEmpty()) { + accumulatingIndexWriter.addIndexes(directoryToCombine.toArray(new Directory[0])); + IOUtils.closeWhileHandlingException(directoryToCombine); + } + + deleteDummyTombstoneEntry(); + } + + private void deleteDummyTombstoneEntry() throws IOException { + Term uid = new Term(IdFieldMapper.NAME, DUMMY_TOMBSTONE_DOC_ID); + accumulatingIndexWriter.deleteDocuments(uid); + } + + private void deletePreviousVersionsForUpdatedDocuments() throws IOException { + Map deleteEntrySet = getLastDeleteEntrySet(); + for (DeleteEntry deleteEntry : deleteEntrySet.values()) { + // For both updates and deletes do a delete only in parent. For updates, latest writes will be on mark for flush writer, + // do delete entry in parent. For delete, do a delete in parent. This will take care of scenario incase deleteInLucene, + // delete went to mark for refresh. + addDeleteEntryToWriter(deleteEntry, accumulatingIndexWriter); + } + } + + /** + * This function is used for performing partial soft delete (delete without inserting a tombstone entry). This is + * used for maintaining a single version of documents across all IndexWriter in a shard. To do this, we perform a + * soft delete using a dummy temporary document as a tombstone entry during the soft update call. This dummy document + * is hard deleted just before refresh. + * + * @param deleteEntry + * @param currentWriter + * @throws IOException + */ + private void addDeleteEntryToWriter(DeleteEntry deleteEntry, IndexWriter currentWriter) throws IOException { + Document document = new Document(); + document.add(new Field("_id", DUMMY_TOMBSTONE_DOC_ID, IdFieldMapper.Defaults.FIELD_TYPE)); + document.add(new NumericDocValuesField(VersionFieldMapper.NAME, deleteEntry.version)); + document.add(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, deleteEntry.primaryTerm)); + currentWriter.softUpdateDocument(deleteEntry.term, document, softDeletesField); + } + + public ReleasableLock getOldWriteLock() { + return liveIndexWriterDeletesMap.old.mapWriteLock; + } + + public ReleasableLock getNewWriteLock() { + return liveIndexWriterDeletesMap.current.mapWriteLock; + } + + @Override + public void afterRefresh(boolean didRefresh) throws IOException { + liveIndexWriterDeletesMap = liveIndexWriterDeletesMap.invalidateOldMap(); + } + + Releasable acquireLock(BytesRef uid) { + return keyedLock.acquire(uid); + } + + public Map getLastDeleteEntrySet() { + return liveIndexWriterDeletesMap.old.lastDeleteEntrySet; + } + + void putLastDeleteEntryUnderLockInNewMap(BytesRef uid, DeleteEntry entry) { + liveIndexWriterDeletesMap.putLastDeleteEntryInCurrentMap(uid, entry); + } + + void putCriteria(BytesRef uid, String criteria) { + assert assertKeyedLockHeldByCurrentThread(uid); + assert uid.bytes.length == uid.length : "Oversized _uid! UID length: " + uid.length + ", bytes length: " + uid.bytes.length; + liveIndexWriterDeletesMap.putCriteriaForDoc(uid, criteria); + } + + DisposableIndexWriter getIndexWriterForIdFromCurrent(BytesRef uid) { + assert assertKeyedLockHeldByCurrentThread(uid); + assert uid.bytes.length == uid.length : "Oversized _uid! UID length: " + uid.length + ", bytes length: " + uid.bytes.length; + return getIndexWriterForIdFromLookup(uid, liveIndexWriterDeletesMap.current); + } + + DisposableIndexWriter getIndexWriterForIdFromOld(BytesRef uid) { + assert assertKeyedLockHeldByCurrentThread(uid); + assert uid.bytes.length == uid.length : "Oversized _uid! UID length: " + uid.length + ", bytes length: " + uid.bytes.length; + return getIndexWriterForIdFromLookup(uid, liveIndexWriterDeletesMap.old); + } + + // Avoid the issue of write lock getting applied on a separate map due to map getting rotated. + DisposableIndexWriter getIndexWriterForIdFromLookup(BytesRef uid, CriteriaBasedIndexWriterLookup indexWriterLookup) { + boolean isCriteriaNotNull = false; + try { + indexWriterLookup.mapReadLock.acquire(); + String criteria = getCriteriaForDoc(uid); + if (criteria != null) { + DisposableIndexWriter disposableIndexWriter = indexWriterLookup.getIndexWriterForCriteria(criteria); + if (disposableIndexWriter != null) { + isCriteriaNotNull = true; + return disposableIndexWriter; + } + } + + return null; + } finally { + if (isCriteriaNotNull == false) { + indexWriterLookup.mapReadLock.close(); + } + } + } + + public boolean hasNewIndexingOrUpdates() { + return liveIndexWriterDeletesMap.hasNewIndexingOrUpdates(); + } + + String getCriteriaForDoc(BytesRef uid) { + return liveIndexWriterDeletesMap.getCriteriaForDoc(uid); + } + + boolean assertKeyedLockHeldByCurrentThread(BytesRef uid) { + assert keyedLock.isHeldByCurrentThread(uid) : "Thread [" + Thread.currentThread().getName() + "], uid [" + uid.utf8ToString() + "]"; + return true; + } + + DisposableIndexWriter computeIndexWriterIfAbsentForCriteria( + final String criteria, + CheckedBiFunction indexWriterSupplier + ) throws IOException { + return computeIndexWriterIfAbsentForCriteria(criteria, liveIndexWriterDeletesMap, indexWriterSupplier); + } + + DisposableIndexWriter computeIndexWriterIfAbsentForCriteria( + final String criteria, + LiveIndexWriterDeletesMap currentLiveIndexWriterDeletesMap, + CheckedBiFunction indexWriterSupplier + ) { + return currentLiveIndexWriterDeletesMap.computeIndexWriterIfAbsentForCriteria( + criteria, + indexWriterSupplier, + engineConfig.getShardId() + ); + } + + public Map getMarkForRefreshIndexWriterMap() { + return liveIndexWriterDeletesMap.old.criteriaBasedIndexWriterMap; + } + + @Override + public long getFlushingBytes() { + ensureOpen(); + long flushingBytes = 0; + Collection currentWriterSet = liveIndexWriterDeletesMap.current.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + for (IndexWriter currentWriter : currentWriterSet) { + flushingBytes += currentWriter.getFlushingBytes(); + } + + return flushingBytes + accumulatingIndexWriter.getFlushingBytes(); + } + + @Override + public long getPendingNumDocs() { + ensureOpen(); + long pendingNumDocs = 0; + Collection currentWriterSet = liveIndexWriterDeletesMap.current.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + ; + for (IndexWriter currentWriter : currentWriterSet) { + pendingNumDocs += currentWriter.getPendingNumDocs(); + } + + // TODO: Should we add docs for old writer as well? + return pendingNumDocs + accumulatingIndexWriter.getPendingNumDocs(); + } + + @Override + public LiveIndexWriterConfig getConfig() { + ensureOpen(); + return accumulatingIndexWriter.getConfig(); + } + + @Override + public synchronized boolean hasPendingMerges() { + return accumulatingIndexWriter.hasPendingMerges(); + } + + // Since we are doing a commit only on parent IndexWriter, in case there is any child level writers or parent writer + // has uncommited changes, we report it as writer having uncommited changes. Since during add indexes new set of changes will be added. + @Override + public boolean hasUncommittedChanges() { + // TODO: Should we do this for old writer as well? + return hasNewIndexingOrUpdates() || accumulatingIndexWriter.hasUncommittedChanges(); + } + + @Override + public Throwable getTragicException() { + Collection currentWriterSet = liveIndexWriterDeletesMap.current.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + for (IndexWriter writer : currentWriterSet) { + if (writer.isOpen() == false && writer.getTragicException() != null) { + return writer.getTragicException(); + } + } + + Collection oldWriterSet = liveIndexWriterDeletesMap.old.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + ; + for (IndexWriter writer : oldWriterSet) { + if (writer.isOpen() == false && writer.getTragicException() != null) { + return writer.getTragicException(); + } + } + + if (accumulatingIndexWriter.isOpen() == false) { + return accumulatingIndexWriter.getTragicException(); + } + + return null; + } + + @Override + public final long ramBytesUsed() { + ensureOpen(); + long ramBytesUsed = 0; + Collection currentWriterSet = liveIndexWriterDeletesMap.current.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + + try (ReleasableLock ignore = liveIndexWriterDeletesMap.current.mapWriteLock.acquire()) { + for (IndexWriter indexWriter : currentWriterSet) { + if (indexWriter.isOpen() == true) { + ramBytesUsed += indexWriter.ramBytesUsed(); + } + } + } + + Collection oldWriterSet = liveIndexWriterDeletesMap.old.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + try (ReleasableLock ignore = liveIndexWriterDeletesMap.old.mapWriteLock.acquire()) { + for (IndexWriter indexWriter : oldWriterSet) { + if (indexWriter.isOpen() == true) { + ramBytesUsed += indexWriter.ramBytesUsed(); + } + } + } + + return ramBytesUsed + accumulatingIndexWriter.ramBytesUsed(); + } + + // We always set live commit data for parent writer as we are commiting data only in parent writer (after refreshing child level + // writers). + @Override + public final synchronized void setLiveCommitData(Iterable> commitUserData) { + accumulatingIndexWriter.setLiveCommitData(commitUserData); + } + + @Override + public final long commit() throws IOException { + ensureOpen(); + return accumulatingIndexWriter.commit(); + } + + @Override + public final synchronized Iterable> getLiveCommitData() { + return accumulatingIndexWriter.getLiveCommitData(); + } + + public void rollback() throws IOException { + if (shouldClose()) { + Collection currentWriterSet = liveIndexWriterDeletesMap.current.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + + for (IndexWriter indexWriter : currentWriterSet) { + if (indexWriter.isOpen() == true) { + indexWriter.rollback(); + } + } + + Collection oldWriterSet = liveIndexWriterDeletesMap.old.criteriaBasedIndexWriterMap.values() + .stream() + .map(DisposableIndexWriter::getIndexWriter) + .collect(Collectors.toSet()); + for (IndexWriter indexWriter : oldWriterSet) { + if (indexWriter.isOpen() == true) { + indexWriter.rollback(); + } + } + + accumulatingIndexWriter.rollback(); + closed.set(true); + } + } + + private boolean shouldClose() { + return closed.get() == false; + } + + private void ensureOpen() throws AlreadyClosedException { + if (closed.get() == true) { + throw new AlreadyClosedException("CompositeIndexWriter is closed"); + } + } + + public boolean isOpen() { + return closed.get() == false; + } + + public boolean isWriteLockedByCurrentThread() { + return liveIndexWriterDeletesMap.current.mapLock.isWriteLockedByCurrentThread() + || liveIndexWriterDeletesMap.old.mapLock.isWriteLockedByCurrentThread(); + } + + @Override + public Releasable obtainWriteLockOnAllMap() { + ReleasableLock lock1 = this.getOldWriteLock().acquire(); + ReleasableLock lock2 = this.getNewWriteLock().acquire(); + return () -> { + lock1.close(); + lock2.close(); + }; + } + + @Override + public void close() throws IOException { + rollback(); + liveIndexWriterDeletesMap = new LiveIndexWriterDeletesMap(); + } + + @Override + public synchronized void deleteUnusedFiles() throws IOException { + accumulatingIndexWriter.deleteUnusedFiles(); + } + + public IndexWriter getAccumulatingIndexWriter() { + return accumulatingIndexWriter; + } + + @Override + public long addDocuments(Iterable docs, Term uid) throws IOException { + // We obtain a read lock on a child level IndexWriter and then return it. Post Indexing completes, we close this + // IndexWriter. + ensureOpen(); + final String criteria = getGroupingCriteriaForDoc(docs.iterator().next()); + DisposableIndexWriter disposableIndexWriter = getAssociatedIndexWriterForCriteria(criteria); + try ( + CriteriaBasedIndexWriterLookup.CriteriaBasedWriterLock ignoreLock = disposableIndexWriter.getLookupMap().getMapReadLock(); + Releasable ignore1 = acquireLock(uid.bytes()) + ) { + putCriteria(uid.bytes(), criteria); + return disposableIndexWriter.getIndexWriter().addDocuments(docs); + } + } + + @Override + public long addDocument(ParseContext.Document doc, Term uid) throws IOException { + ensureOpen(); + final String criteria = getGroupingCriteriaForDoc(doc); + DisposableIndexWriter disposableIndexWriter = getAssociatedIndexWriterForCriteria(criteria); + try ( + CriteriaBasedIndexWriterLookup.CriteriaBasedWriterLock ignoreLock = disposableIndexWriter.getLookupMap().getMapReadLock(); + Releasable ignore1 = acquireLock(uid.bytes()) + ) { + putCriteria(uid.bytes(), criteria); + return disposableIndexWriter.getIndexWriter().addDocument(doc); + } + } + + @Override + public void softUpdateDocuments( + Term uid, + Iterable docs, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException { + ensureOpen(); + final String criteria = getGroupingCriteriaForDoc(docs.iterator().next()); + DisposableIndexWriter disposableIndexWriter = getAssociatedIndexWriterForCriteria(criteria); + try ( + CriteriaBasedIndexWriterLookup.CriteriaBasedWriterLock ignoreLock = disposableIndexWriter.getLookupMap().getMapReadLock(); + Releasable ignore1 = acquireLock(uid.bytes()) + ) { + putCriteria(uid.bytes(), criteria); + disposableIndexWriter.getIndexWriter().softUpdateDocuments(uid, docs, softDeletesField); + // TODO: Do we need to add more info in delete entry like id, seqNo, primaryTerm for debugging?? + // TODO: Entry can be null for first version or if there is term bum up (validate if this is because we need to keep previous + // version). + // Validate if this is going wrong?? Last entry should be checked to handle scenario when there is a indexing post delete. + disposableIndexWriter.getLookupMap().putLastDeleteEntry(uid.bytes(), new DeleteEntry(uid, version, seqNo, primaryTerm)); + } + } + + @Override + public void softUpdateDocument( + Term uid, + ParseContext.Document doc, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException { + ensureOpen(); + final String criteria = getGroupingCriteriaForDoc(doc); + DisposableIndexWriter disposableIndexWriter = getAssociatedIndexWriterForCriteria(criteria); + try ( + CriteriaBasedIndexWriterLookup.CriteriaBasedWriterLock ignoreLock = disposableIndexWriter.getLookupMap().getMapReadLock(); + Releasable ignore1 = acquireLock(uid.bytes()) + ) { + putCriteria(uid.bytes(), criteria); + disposableIndexWriter.getIndexWriter().softUpdateDocument(uid, doc, softDeletesField); + // TODO: Do we need to add more info in delete entry like id, seqNo, primaryTerm for debugging?? + // TODO: Entry can be null for first version or if there is term bum up (validate if this is because we need to keep previous + // version). + // Validate if this is going wrong?? Last entry should be checked to handle scenario when there is a indexing post delete. + disposableIndexWriter.getLookupMap().putLastDeleteEntry(uid.bytes(), new DeleteEntry(uid, version, seqNo, primaryTerm)); + } + } + + /** + * For deleteDocument call, we will take a lock on current writer, do a partial delete of the + * document (delete without indexing tombstone entry). We do a similar thing for old map IndexWriter. For parent, we + * do a full delete (delete doc + tombstone entry). This ensures only a single tombstone entry is made after delete + * operation. Also doing a full delete on parent ensures, that accumulating IndexWriter is never left in an + * inconsistent state (which may become an issue with segrep). + * + * @param uid uid of the document that is getting deleted. + * @param isStaleOperation signify if this is a stale operation (say if document is already deleted). + * @param doc tombstone entry. + * @param softDeletesField the soft delete field + * + * @throws IOException if there is a low-level IO error. + */ + @Override + public void deleteDocument( + Term uid, + boolean isStaleOperation, + ParseContext.Document doc, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException { + ensureOpen(); + try (Releasable ignore1 = acquireLock(uid.bytes())) { + CompositeIndexWriter.DisposableIndexWriter currentDisposableWriter = getIndexWriterForIdFromCurrent(uid.bytes()); + if (currentDisposableWriter != null) { + try ( + CriteriaBasedIndexWriterLookup.CriteriaBasedWriterLock ignore = currentDisposableWriter.getLookupMap().getMapReadLock() + ) { + if (currentDisposableWriter.getLookupMap().isClosed() == false && isStaleOperation == false) { + addDeleteEntryToWriter(new DeleteEntry(uid, version, seqNo, primaryTerm), currentDisposableWriter.getIndexWriter()); + } + } + } + + CompositeIndexWriter.DisposableIndexWriter oldDisposableWriter = getIndexWriterForIdFromOld(uid.bytes()); + if (oldDisposableWriter != null) { + try (CriteriaBasedIndexWriterLookup.CriteriaBasedWriterLock ignore = oldDisposableWriter.getLookupMap().getMapReadLock()) { + if (oldDisposableWriter.getLookupMap().isClosed() == false && isStaleOperation == false) { + addDeleteEntryToWriter(new DeleteEntry(uid, version, seqNo, primaryTerm), oldDisposableWriter.getIndexWriter()); + } + } + } + + deleteInLucene(uid, isStaleOperation, accumulatingIndexWriter, doc, softDeletesField); + } + } + + private void deleteInLucene( + Term uid, + boolean isStaleOperation, + IndexWriter currentWriter, + Iterable doc, + Field... softDeletesField + ) throws IOException { + if (isStaleOperation) { + currentWriter.addDocument(doc); + } else { + currentWriter.softUpdateDocument(uid, doc, softDeletesField); + } + } + + private DisposableIndexWriter getAssociatedIndexWriterForCriteria(final String criteria) throws IOException { + return computeIndexWriterIfAbsentForCriteria(criteria, childIndexWriterFactory); + } + + private String getGroupingCriteriaForDoc(final ParseContext.Document doc) { + return doc == null ? null : doc.getGroupingCriteria(); + } + + @Override + public void forceMergeDeletes(boolean doWait) throws IOException { + accumulatingIndexWriter.forceMergeDeletes(doWait); + } + + @Override + public final void maybeMerge() throws IOException { + ensureOpen(); + accumulatingIndexWriter.maybeMerge(); + } + + @Override + public void forceMerge(int maxNumSegments, boolean doWait) throws IOException { + ensureOpen(); + accumulatingIndexWriter.forceMerge(maxNumSegments, doWait); + } + + CompositeIndexWriter.DisposableIndexWriter createChildWriterUtil( + String associatedCriteria, + CompositeIndexWriter.CriteriaBasedIndexWriterLookup lookup + ) throws IOException { + return new CompositeIndexWriter.DisposableIndexWriter( + nativeIndexWriterFactory.createWriter( + store.newTempDirectory(CHILD_DIRECTORY_PREFIX + associatedCriteria + "_" + UUID.randomUUID()), + new OpenSearchConcurrentMergeScheduler( + engineConfig.getShardId(), + engineConfig.getIndexSettings(), + engineConfig.getMergedSegmentTransferTracker() + ), + true, + IndexWriterConfig.OpenMode.CREATE, + null, + softDeletesPolicy, + config(), + logger, + associatedCriteria + ), + lookup + ); + } + + private EngineConfig config() { + return engineConfig; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/DocumentIndexWriter.java b/server/src/main/java/org/opensearch/index/engine/DocumentIndexWriter.java new file mode 100644 index 0000000000000..47bdf7b2b5073 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/DocumentIndexWriter.java @@ -0,0 +1,94 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LiveIndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.lease.Releasable; +import org.opensearch.index.mapper.ParseContext; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Map; + +/** + * Primary interface for document indexing operation in OpenSearch. This interface is mapped after Lucene's IndexWriter. + * + */ +public interface DocumentIndexWriter extends Closeable, ReferenceManager.RefreshListener { + + long getFlushingBytes(); + + long getPendingNumDocs(); + + LiveIndexWriterConfig getConfig(); + + boolean hasPendingMerges(); + + boolean hasUncommittedChanges(); + + Throwable getTragicException(); + + long ramBytesUsed(); + + void setLiveCommitData(Iterable> commitUserData); + + long commit() throws IOException; + + Iterable> getLiveCommitData(); + + void rollback() throws IOException; + + void close() throws IOException; + + void deleteUnusedFiles() throws IOException; + + long addDocuments(Iterable docs, Term uid) throws IOException; + + long addDocument(ParseContext.Document doc, Term uid) throws IOException; + + void softUpdateDocuments( + Term uid, + Iterable docs, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException; + + void softUpdateDocument(Term uid, ParseContext.Document doc, long version, long seqNo, long primaryTerm, Field... softDeletesField) + throws IOException; + + void deleteDocument( + Term uid, + boolean isStaleOperation, + ParseContext.Document doc, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException; + + void forceMergeDeletes(boolean doWait) throws IOException; + + void maybeMerge() throws IOException; + + void forceMerge(int maxNumSegments, boolean doWait) throws IOException; + + IndexWriter getAccumulatingIndexWriter(); + + boolean hasNewIndexingOrUpdates(); + + boolean isWriteLockedByCurrentThread(); + + Releasable obtainWriteLockOnAllMap(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index d6aa38814a14d..2c42a72005c95 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -152,7 +152,7 @@ public abstract class Engine implements LifecycleAware, Closeable { protected final Store store; protected final AtomicBoolean isClosed = new AtomicBoolean(false); private final CounterMetric totalUnreferencedFileCleanUpsPerformed = new CounterMetric(); - private final CountDownLatch closedLatch = new CountDownLatch(1); + protected final CountDownLatch closedLatch = new CountDownLatch(1); protected final EventListener eventListener; protected final ReentrantLock failEngineLock = new ReentrantLock(); protected final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock(); @@ -2115,7 +2115,7 @@ public void close() throws IOException { awaitPendingClose(); } - private void awaitPendingClose() { + protected void awaitPendingClose() { try { closedLatch.await(); } catch (InterruptedException e) { diff --git a/server/src/main/java/org/opensearch/index/engine/IndexWriterFactory.java b/server/src/main/java/org/opensearch/index/engine/IndexWriterFactory.java new file mode 100644 index 0000000000000..f37abb79401c1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/IndexWriterFactory.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.store.Directory; + +import java.io.IOException; + +/** + * Base IndexWriter factory. + * + * @opensearch.api + */ +public interface IndexWriterFactory { + IndexWriter createWriter(Directory directory, IndexWriterConfig config) throws IOException; + + IndexWriter createWriter( + Directory directory, + MergeScheduler mergeScheduler, + Boolean commitOnClose, + IndexWriterConfig.OpenMode openMode, + CombinedDeletionPolicy deletionPolicy, + SoftDeletesPolicy softDeletesPolicy, + EngineConfig engineConfig, + Logger logger, + String associatedCriteria + ) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java b/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java index 6393a91a8d671..2b643bde26caf 100644 --- a/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/IngestionEngine.java @@ -8,7 +8,6 @@ package org.opensearch.index.engine; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; @@ -94,7 +93,7 @@ private void initializeStreamPoller( + "-" + engineConfig.getShardId().getId(); - Map commitData = commitDataAsMap(indexWriter); + Map commitData = commitDataAsMap(documentIndexWriter); StreamPoller.ResetState resetState = ingestionSource.getPointerInitReset().getType(); String resetValue = ingestionSource.getPointerInitReset().getValue(); IngestionShardPointer startPointer = null; @@ -224,26 +223,33 @@ public void indexInternal(Index index, boolean isCreateMode) throws IOException private IndexResult indexIntoLucene(Index index, boolean isCreateMode) throws IOException { if (isCreateMode || index.getAutoGeneratedIdTimestamp() != UNSET_AUTO_GENERATED_TIMESTAMP) { - addDocs(index.docs(), indexWriter); + addDocs(index.docs(), documentIndexWriter, index.uid()); } else { - updateDocs(index.uid(), index.docs(), indexWriter); + updateDocs(index.uid(), index.docs(), documentIndexWriter, index.version(), index.seqNo(), index.primaryTerm()); } return new IndexResult(index.version(), index.primaryTerm(), index.seqNo(), true); } - private void addDocs(final List docs, final IndexWriter indexWriter) throws IOException { + private void addDocs(final List docs, final DocumentIndexWriter indexWriter, Term uid) throws IOException { if (docs.size() > 1) { - indexWriter.addDocuments(docs); + indexWriter.addDocuments(docs, uid); } else { - indexWriter.addDocument(docs.get(0)); + indexWriter.addDocument(docs.get(0), uid); } } - private void updateDocs(final Term uid, final List docs, final IndexWriter indexWriter) throws IOException { + private void updateDocs( + final Term uid, + final List docs, + final DocumentIndexWriter indexWriter, + long version, + long seqNo, + long primaryTerm + ) throws IOException { if (docs.size() > 1) { - indexWriter.softUpdateDocuments(uid, docs, softDeletesField); + indexWriter.softUpdateDocuments(uid, docs, version, seqNo, primaryTerm, softDeletesField); } else { - indexWriter.softUpdateDocument(uid, docs.get(0), softDeletesField); + indexWriter.softUpdateDocument(uid, docs.get(0), version, seqNo, primaryTerm, softDeletesField); } } @@ -278,8 +284,15 @@ public void deleteInternal(Delete delete) throws IOException { + doc + " ]"; doc.add(softDeletesField); - - indexWriter.softUpdateDocument(delete.uid(), doc, softDeletesField); + documentIndexWriter.deleteDocument( + delete.uid(), + false, + doc, + delete.version(), + delete.seqNo(), + delete.primaryTerm(), + softDeletesField + ); if (isExternalVersioning) { versionMap.putDeleteUnderLock( delete.uid().bytes(), @@ -343,7 +356,7 @@ public Translog.Snapshot newChangesSnapshot( * source. */ @Override - protected void commitIndexWriter(final IndexWriter writer, final String translogUUID) throws IOException { + protected void commitIndexWriter(final DocumentIndexWriter writer, final String translogUUID) throws IOException { try { final long localCheckpoint = localCheckpointTracker.getProcessedCheckpoint(); final IngestionShardPointer batchStartPointer = streamPoller.getBatchStartPointer(); @@ -480,7 +493,7 @@ protected TranslogManager createTranslogManager( } protected Map commitDataAsMap() { - return commitDataAsMap(indexWriter); + return commitDataAsMap(documentIndexWriter); } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index 3dd0f0b10cb8b..3438ae319b197 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -37,16 +37,13 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexCommit; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LiveIndexWriterConfig; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfos; -import org.apache.lucene.index.SoftDeletesRetentionMergePolicy; import org.apache.lucene.index.StandardDirectoryReader; import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.Term; @@ -64,18 +61,14 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.InfoStream; import org.opensearch.ExceptionsHelper; import org.opensearch.OpenSearchException; -import org.opensearch.Version; import org.opensearch.action.index.IndexRequest; -import org.opensearch.common.Booleans; import org.opensearch.common.Nullable; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lease.Releasable; import org.opensearch.common.lease.Releasables; -import org.opensearch.common.lucene.LoggerInfoStream; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.index.DerivedSourceDirectoryReader; import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; @@ -100,7 +93,6 @@ import org.opensearch.index.mapper.ParseContext; import org.opensearch.index.mapper.ParsedDocument; import org.opensearch.index.mapper.SeqNoFieldMapper; -import org.opensearch.index.mapper.SourceFieldMapper; import org.opensearch.index.mapper.Uid; import org.opensearch.index.merge.MergeStats; import org.opensearch.index.merge.MergedSegmentTransferTracker; @@ -140,7 +132,6 @@ import java.util.concurrent.locks.ReentrantLock; import java.util.function.BiConsumer; import java.util.function.BiFunction; -import java.util.function.UnaryOperator; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -163,7 +154,7 @@ public class InternalEngine extends Engine { protected volatile long lastDeleteVersionPruneTimeMSec; protected final TranslogManager translogManager; - protected final IndexWriter indexWriter; + protected final DocumentIndexWriter documentIndexWriter; protected final LocalCheckpointTracker localCheckpointTracker; protected final AtomicLong maxUnsafeAutoIdTimestamp = new AtomicLong(-1); protected final SoftDeletesPolicy softDeletesPolicy; @@ -206,6 +197,7 @@ public class InternalEngine extends Engine { private final CounterMetric numDocAppends = new CounterMetric(); private final CounterMetric numDocUpdates = new CounterMetric(); private final LastRefreshedCheckpointListener lastRefreshedCheckpointListener; + private final boolean isContextAwareEnabled; private final CompletionStatsCache completionStatsCache; @@ -223,6 +215,7 @@ public class InternalEngine extends Engine { private final AtomicLong inFlightDocCount = new AtomicLong(); private final int maxDocs; + private final IndexWriterFactory nativeIndexWriterFactory; public InternalEngine(EngineConfig engineConfig) { this(engineConfig, IndexWriter.MAX_DOCS, LocalCheckpointTracker::new, TranslogEventListener.NOOP_TRANSLOG_EVENT_LISTENER); @@ -250,7 +243,7 @@ public TranslogManager translogManager() { } final TranslogDeletionPolicy translogDeletionPolicy = getTranslogDeletionPolicy(engineConfig); store.incRef(); - IndexWriter writer = null; + DocumentIndexWriter writer = null; ExternalReaderManager externalReaderManager = null; OpenSearchReaderManager internalReaderManager = null; EngineMergeScheduler scheduler = null; @@ -303,12 +296,14 @@ public void onFailure(String reason, Exception ex) { translogManager::getLastSyncedGlobalCheckpoint ); this.localCheckpointTracker = createLocalCheckpointTracker(localCheckpointTrackerSupplier); - writer = createWriter(); + this.isContextAwareEnabled = engineConfig.getIndexSettings().isContextAwareEnabled(); + this.nativeIndexWriterFactory = new NativeLuceneIndexWriterFactory(); + writer = getDocumentIndexWriter(); bootstrapAppendOnlyInfoFromWriter(writer); final Map commitData = commitDataAsMap(writer); historyUUID = loadHistoryUUID(commitData); forceMergeUUID = commitData.get(FORCE_MERGE_UUID_KEY); - indexWriter = writer; + documentIndexWriter = writer; } catch (IOException | TranslogCorruptedException e) { throw new EngineCreationFailureException(shardId, "failed to create engine", e); } catch (AssertionError e) { @@ -331,8 +326,12 @@ public void onFailure(String reason, Exception ex) { for (ReferenceManager.RefreshListener listener : engineConfig.getInternalRefreshListener()) { this.internalReaderManager.addListener(listener); } + + // Set the Refresh checkpoint first and then sync child with parent to ensure parent Checkpoint is grater than Refresh + // checkpoint. this.lastRefreshedCheckpointListener = new LastRefreshedCheckpointListener(localCheckpointTracker.getProcessedCheckpoint()); this.internalReaderManager.addListener(lastRefreshedCheckpointListener); + internalReaderManager.addListener(documentIndexWriter); maxSeqNoOfUpdatesOrDeletes = new AtomicLong( SequenceNumbers.max(localCheckpointTracker.getMaxSeqNo(), translogManager.getMaxSeqNo()) ); @@ -362,6 +361,17 @@ public void onFailure(String reason, Exception ex) { logger.trace("created new InternalEngine"); } + private DocumentIndexWriter getDocumentIndexWriter() throws IOException { + DocumentIndexWriter writer; + if (isContextAwareEnabled) { + writer = new CompositeIndexWriter(engineConfig, createWriter(), softDeletesPolicy, softDeletesField, nativeIndexWriterFactory); + } else { + writer = new LuceneIndexWriter(createWriter()); + } + + return writer; + } + protected TranslogManager createTranslogManager( String translogUUID, TranslogDeletionPolicy translogDeletionPolicy, @@ -535,7 +545,7 @@ public int fillSeqNoGaps(long primaryTerm) throws IOException { } } - private void bootstrapAppendOnlyInfoFromWriter(IndexWriter writer) { + private void bootstrapAppendOnlyInfoFromWriter(DocumentIndexWriter writer) { for (Map.Entry entry : writer.getLiveCommitData()) { if (MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID.equals(entry.getKey())) { assert maxUnsafeAutoIdTimestamp.get() == -1 : "max unsafe timestamp was assigned already [" @@ -554,7 +564,7 @@ boolean hasSnapshottedCommits() { private void revisitIndexDeletionPolicyOnTranslogSynced() { try { if (combinedDeletionPolicy.hasUnreferencedCommits()) { - indexWriter.deleteUnusedFiles(); + documentIndexWriter.deleteUnusedFiles(); } translogManager.trimUnreferencedReaders(); } catch (IOException ex) { @@ -576,7 +586,7 @@ public String getForceMergeUUID() { /** Returns how many bytes we are currently moving from indexing buffer to segments on disk */ @Override public long getWritingBytes() { - return indexWriter.getFlushingBytes() + versionMap.getRefreshingBytes(); + return documentIndexWriter.getFlushingBytes() + versionMap.getRefreshingBytes(); } private ExternalReaderManager createReaderManager(RefreshWarmerListener externalRefreshListener) throws EngineException { @@ -584,8 +594,9 @@ private ExternalReaderManager createReaderManager(RefreshWarmerListener external OpenSearchReaderManager internalReaderManager = null; try { try { + // We always open reader on parent IndexWriter. final OpenSearchDirectoryReader directoryReader = OpenSearchDirectoryReader.wrap( - DirectoryReader.open(indexWriter), + DirectoryReader.open(documentIndexWriter.getAccumulatingIndexWriter()), shardId ); internalReaderManager = new OpenSearchReaderManager(directoryReader); @@ -596,7 +607,7 @@ private ExternalReaderManager createReaderManager(RefreshWarmerListener external } catch (IOException e) { maybeFailEngine("start", e); try { - indexWriter.rollback(); + documentIndexWriter.rollback(); } catch (IOException inner) { // iw is closed below e.addSuppressed(inner); } @@ -604,7 +615,7 @@ private ExternalReaderManager createReaderManager(RefreshWarmerListener external } } finally { if (success == false) { // release everything we created on a failure - IOUtils.closeWhileHandlingException(internalReaderManager, indexWriter); + IOUtils.closeWhileHandlingException(internalReaderManager, documentIndexWriter); } } } @@ -763,6 +774,11 @@ protected VersionValue resolveDocVersion(final Operation op, boolean loadSeqNo) if (versionValue == null) { assert incrementIndexVersionLookup(); // used for asserting in tests final VersionsAndSeqNoResolver.DocIdAndVersion docIdAndVersion; + + // Doc version will be maintained in version map till afterRefresh of parent ReaderManager. Since before refresh we sync the + // data of child level + // IndexWriters with parent writers, version will be either present in version map or in parent IndexWriter. So we do not need + // to resolve version from child level IndexWriters (both from mark for refresh and active IndexWriter). try (Searcher searcher = acquireSearcher("load_version", SearcherScope.INTERNAL)) { docIdAndVersion = VersionsAndSeqNoResolver.loadDocIdAndVersion(searcher.getIndexReader(), op.uid(), loadSeqNo); } @@ -1151,19 +1167,21 @@ private IndexResult indexIntoLucene(Index index, IndexingStrategy plan) throws I index.parsedDoc().version().setLongValue(plan.versionForIndexing); try { if (plan.addStaleOpToLucene) { - addStaleDocs(index.docs(), indexWriter); + addStaleDocs(index.docs(), documentIndexWriter, index.uid()); } else if (plan.useLuceneUpdateDocument) { assert assertMaxSeqNoOfUpdatesIsAdvanced(index.uid(), index.seqNo(), true, true); - updateDocs(index.uid(), index.docs(), indexWriter); + updateDocs(index.uid(), index.docs(), documentIndexWriter, plan.versionForIndexing, index.seqNo(), index.primaryTerm()); } else { // document does not exists, we can optimize for create, but double check if assertions are running assert assertDocDoesNotExist(index, canOptimizeAddDocument(index) == false); - addDocs(index.docs(), indexWriter); + addDocs(index.docs(), documentIndexWriter, index.uid()); } + return new IndexResult(plan.versionForIndexing, index.primaryTerm(), index.seqNo(), plan.currentNotFoundOrDeleted); } catch (Exception ex) { if (ex instanceof AlreadyClosedException == false - && indexWriter.getTragicException() == null + // TODO: Check if isClose check in getTragicException will cause any issue here + && documentIndexWriter.getTragicException() == null && treatDocumentFailureAsTragicError(index) == false) { /* There is no tragic event recorded so this must be a document failure. * @@ -1218,23 +1236,23 @@ private boolean mayHaveBeenIndexedBefore(Index index) { return mayHaveBeenIndexBefore; } - private void addDocs(final List docs, final IndexWriter indexWriter) throws IOException { + private void addDocs(final List docs, final DocumentIndexWriter indexWriter, Term uid) throws IOException { if (docs.size() > 1) { - indexWriter.addDocuments(docs); + indexWriter.addDocuments(docs, uid); } else { - indexWriter.addDocument(docs.get(0)); + indexWriter.addDocument(docs.get(0), uid); } numDocAppends.inc(docs.size()); } - private void addStaleDocs(final List docs, final IndexWriter indexWriter) throws IOException { + private void addStaleDocs(final List docs, final DocumentIndexWriter indexWriter, Term uid) throws IOException { for (ParseContext.Document doc : docs) { doc.add(softDeletesField); // soft-deleted every document before adding to Lucene } if (docs.size() > 1) { - indexWriter.addDocuments(docs); + indexWriter.addDocuments(docs, uid); } else { - indexWriter.addDocument(docs.get(0)); + indexWriter.addDocument(docs.get(0), uid); } } @@ -1347,7 +1365,14 @@ private boolean assertDocDoesNotExist(final Index index, final boolean allowDele return true; } - private void updateDocs(final Term uid, final List docs, final IndexWriter indexWriter) throws IOException { + private void updateDocs( + final Term uid, + final List docs, + final DocumentIndexWriter indexWriter, + long version, + long seqNo, + long primaryTerm + ) throws IOException { if (engineConfig.getIndexSettings().getIndexMetadata().isAppendOnlyIndex()) { failEngine( "Failing shard as update operation is not allowed for append only index ", @@ -1356,9 +1381,9 @@ private void updateDocs(final Term uid, final List docs, } if (docs.size() > 1) { - indexWriter.softUpdateDocuments(uid, docs, softDeletesField); + indexWriter.softUpdateDocuments(uid, docs, version, seqNo, primaryTerm, softDeletesField); } else { - indexWriter.softUpdateDocument(uid, docs.get(0), softDeletesField); + indexWriter.softUpdateDocument(uid, docs.get(0), version, seqNo, primaryTerm, softDeletesField); } numDocUpdates.inc(docs.size()); } @@ -1455,7 +1480,7 @@ private Exception tryAcquireInFlightDocs(Operation operation, int addingDocs) { assert operation.origin() == Operation.Origin.PRIMARY : operation; assert operation.seqNo() == SequenceNumbers.UNASSIGNED_SEQ_NO : operation; assert addingDocs > 0 : addingDocs; - final long totalDocs = indexWriter.getPendingNumDocs() + inFlightDocCount.addAndGet(addingDocs); + long totalDocs = inFlightDocCount.addAndGet(addingDocs) + documentIndexWriter.getPendingNumDocs(); if (totalDocs > maxDocs) { releaseInFlightDocs(addingDocs); return new IllegalArgumentException( @@ -1589,18 +1614,22 @@ private DeleteResult deleteInLucene(Delete delete, DeletionStrategy plan) throws + doc + " ]"; doc.add(softDeletesField); - if (plan.addStaleOpToLucene || plan.currentlyDeleted) { - indexWriter.addDocument(doc); - } else { - indexWriter.softUpdateDocument(delete.uid(), doc, softDeletesField); - } + documentIndexWriter.deleteDocument( + delete.uid(), + plan.addStaleOpToLucene || plan.currentlyDeleted, + doc, + plan.versionOfDeletion, + delete.seqNo(), + delete.primaryTerm(), + softDeletesField + ); return new DeleteResult(plan.versionOfDeletion, delete.primaryTerm(), delete.seqNo(), plan.currentlyDeleted == false); } catch (final Exception ex) { /* * Document level failures when deleting are unexpected, we likely hit something fatal such as the Lucene index being corrupt, * or the Lucene document limit. We have already issued a sequence number here so this is fatal, fail the engine. */ - if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) { + if (ex instanceof AlreadyClosedException == false && documentIndexWriter.getTragicException() == null) { final String reason = String.format( Locale.ROOT, "delete id[%s] origin [%s] seq#[%d] failed at the document level", @@ -1720,6 +1749,33 @@ public NoOpResult noOp(final NoOp noOp) throws IOException { return noOpResult; } + @Override + public void flushAndClose() throws IOException { + if (isClosed.get() == false) { + logger.trace("flushAndClose now acquire writeLock"); + try (ReleasableLock lock = writeLock.acquire(); Releasable ignored = documentIndexWriter.obtainWriteLockOnAllMap();) { + flushAndCloseInternal(); + } + } + awaitPendingClose(); + } + + private void flushAndCloseInternal() throws IOException { + logger.trace("flushAndClose now acquired writeLock"); + try { + logger.debug("flushing shard on close - this might take some time to sync files to disk"); + try { + // TODO we might force a flush in the future since we have the write lock already even though recoveries + // are running. + flush(); + } catch (AlreadyClosedException ex) { + logger.debug("engine already closed - skipping flushAndClose"); + } + } finally { + close(); // double close is not a problem + } + } + private NoOpResult innerNoOp(final NoOp noOp) throws IOException { assert readLock.isHeldByCurrentThread() || writeLock.isHeldByCurrentThread(); assert noOp.seqNo() > SequenceNumbers.NO_OPS_PERFORMED; @@ -1748,14 +1804,15 @@ private NoOpResult innerNoOp(final NoOp noOp) throws IOException { assert doc.getField(SeqNoFieldMapper.TOMBSTONE_NAME) != null : "Noop tombstone document but _tombstone field is not set [" + doc + " ]"; doc.add(softDeletesField); - indexWriter.addDocument(doc); + // We add Stale entry only on parent IndexWriter. + documentIndexWriter.getAccumulatingIndexWriter().addDocument(doc); } catch (final Exception ex) { /* * Document level failures when adding a no-op are unexpected, we likely hit something fatal such as the Lucene * index being corrupt, or the Lucene document limit. We have already issued a sequence number here so this is * fatal, fail the engine. */ - if (ex instanceof AlreadyClosedException == false && indexWriter.getTragicException() == null) { + if (ex instanceof AlreadyClosedException == false && documentIndexWriter.getTragicException() == null) { failEngine("no-op origin[" + noOp.origin() + "] seq#[" + noOp.seqNo() + "] failed at document level", ex); } throw ex; @@ -1813,7 +1870,8 @@ final boolean refresh(String source, SearcherScope scope, boolean block) throws // the second refresh will only do the extra work we have to do for warming caches etc. ReferenceManager referenceManager = getReferenceManager(scope); // it is intentional that we never refresh both internal / external together - if (block) { + // When context aware is enabled, we are always doing a blocking refresh. + if (block || isContextAwareEnabled) { referenceManager.maybeRefreshBlocking(); refreshed = true; } else { @@ -1898,7 +1956,7 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { // Only flush if (1) Lucene has uncommitted docs, or (2) forced by caller, or (3) the // newly created commit points to a different translog generation (can free translog), // or (4) the local checkpoint information in the last commit is stale, which slows down future recoveries. - boolean hasUncommittedChanges = indexWriter.hasUncommittedChanges(); + boolean hasUncommittedChanges = documentIndexWriter.hasUncommittedChanges(); boolean shouldPeriodicallyFlush = shouldPeriodicallyFlush(); if (hasUncommittedChanges || force @@ -1917,7 +1975,7 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { final GatedCloseable latestCommit = engineConfig.getIndexSettings().isSegRepEnabledOrRemoteNode() ? acquireLastIndexCommit(false) : null; - commitIndexWriter(indexWriter, translogManager.getTranslogUUID()); + commitIndexWriter(documentIndexWriter, translogManager.getTranslogUUID()); logger.trace("finished commit for flush"); // a temporary debugging to investigate test failure - issue#32827. Remove when the issue is resolved @@ -1930,7 +1988,6 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { // we need to refresh in order to clear older version values refresh("version_table_flush", SearcherScope.INTERNAL, true); - if (latestCommit != null) { latestCommit.close(); } @@ -2045,9 +2102,9 @@ public void forceMerge( * thread for optimize, and the 'optimizeLock' guarding this code, and (3) ConcurrentMergeScheduler * syncs calls to findForcedMerges. */ - assert indexWriter.getConfig().getMergePolicy() instanceof OpenSearchMergePolicy : "MergePolicy is " - + indexWriter.getConfig().getMergePolicy().getClass().getName(); - OpenSearchMergePolicy mp = (OpenSearchMergePolicy) indexWriter.getConfig().getMergePolicy(); + assert documentIndexWriter.getConfig().getMergePolicy() instanceof OpenSearchMergePolicy : "MergePolicy is " + + documentIndexWriter.getConfig().getMergePolicy().getClass().getName(); + OpenSearchMergePolicy mp = (OpenSearchMergePolicy) documentIndexWriter.getConfig().getMergePolicy(); optimizeLock.lock(); try { ensureOpen(); @@ -2057,14 +2114,20 @@ public void forceMerge( } store.incRef(); // increment the ref just to ensure nobody closes the store while we optimize try { + if (isContextAwareEnabled) { + // This ensures all segments in memory are available to parent writer before triggering a force merge. + // This will be needed as in prior scenario, we call a flush before triggering force merge in lucene. + refresh("force merge"); + } + if (onlyExpungeDeletes) { assert upgrade == false; - indexWriter.forceMergeDeletes(true /* blocks and waits for merges*/); + documentIndexWriter.forceMergeDeletes(true /* blocks and waits for merges*/); } else if (maxNumSegments <= 0) { assert upgrade == false; - indexWriter.maybeMerge(); + documentIndexWriter.maybeMerge(); } else { - indexWriter.forceMerge(maxNumSegments, true /* blocks and waits for merges*/); + documentIndexWriter.forceMerge(maxNumSegments, true /* blocks and waits for merges*/); this.forceMergeUUID = forceMergeUUID; } if (flush) { @@ -2126,7 +2189,7 @@ private void releaseIndexCommit(IndexCommit snapshot) throws IOException { try { // Here we don't have to trim translog because snapshotting an index commit // does not lock translog or prevents unreferenced files from trimming. - indexWriter.deleteUnusedFiles(); + documentIndexWriter.deleteUnusedFiles(); } catch (AlreadyClosedException ignored) { // That's ok, we'll clean up unused files the next time it's opened. } @@ -2143,12 +2206,14 @@ private boolean failOnTragicEvent(AlreadyClosedException ex) { // if we are already closed due to some tragic exception // we need to fail the engine. it might have already been failed before // but we are double-checking it's failed and closed - if (indexWriter.isOpen() == false && indexWriter.getTragicException() != null) { + final Throwable writerTragicException = documentIndexWriter.getTragicException(); + // TODO Check if need to check for isOpen for other IndexWriters as well + if (writerTragicException != null) { final Exception tragicException; - if (indexWriter.getTragicException() instanceof Exception exception) { - tragicException = exception; + if (writerTragicException instanceof Exception) { + tragicException = (Exception) writerTragicException; } else { - tragicException = new RuntimeException(indexWriter.getTragicException()); + tragicException = new RuntimeException(writerTragicException); } failEngine("already closed by tragic event on the index writer", tragicException); engineFailed = true; @@ -2176,14 +2241,12 @@ protected boolean maybeFailEngine(String source, Exception e) { // throw and AssertionError if the tragic event condition is not met. if (e instanceof AlreadyClosedException) { return failOnTragicEvent((AlreadyClosedException) e); - } else if (e != null - && ((indexWriter.isOpen() == false && indexWriter.getTragicException() == e) - || (translogManager.getTragicExceptionIfClosed() == e))) { - // this spot on - we are handling the tragic event exception here so we have to fail the engine - // right away - failEngine(source, e); - return true; - } + } else if (e != null && ((documentIndexWriter.getTragicException() == e) || (translogManager.getTragicExceptionIfClosed() == e))) { + // this spot on - we are handling the tragic event exception here so we have to fail the engine + // right away + failEngine(source, e); + return true; + } return false; } @@ -2225,14 +2288,14 @@ public GatedCloseable getSegmentInfosSnapshot() { @Override protected final void writerSegmentStats(SegmentsStats stats) { stats.addVersionMapMemoryInBytes(versionMap.ramBytesUsed()); - stats.addIndexWriterMemoryInBytes(indexWriter.ramBytesUsed()); + stats.addIndexWriterMemoryInBytes(documentIndexWriter.ramBytesUsed()); stats.updateMaxUnsafeAutoIdTimestamp(maxUnsafeAutoIdTimestamp.get()); } @Override public long getIndexBufferRAMBytesUsed() { // We don't guard w/ readLock here, so we could throw AlreadyClosedException - return indexWriter.ramBytesUsed() + versionMap.ramBytesUsedForRefresh(); + return documentIndexWriter.ramBytesUsed() + versionMap.ramBytesUsedForRefresh(); } @Override @@ -2256,6 +2319,23 @@ public List segments(boolean verbose) { } } + @Override + public void close() throws IOException { + // The logic for closing writer is same as Engine except we are taking additional locks on child level writers. + if (isClosed.get() == false) { + logger.debug("close now acquiring writeLock"); + try (ReleasableLock lock = writeLock.acquire(); Releasable ignored = documentIndexWriter.obtainWriteLockOnAllMap();) { + closeInternal(); + } + } + awaitPendingClose(); + } + + private void closeInternal() throws IOException { + logger.debug("close acquired writeLock"); + closeNoLock("api", closedLatch); + } + /** * Closes the engine without acquiring the write lock. This should only be * called while the write lock is hold or in a disaster condition ie. if the engine @@ -2264,7 +2344,14 @@ public List segments(boolean verbose) { @Override protected final void closeNoLock(String reason, CountDownLatch closedLatch) { if (isClosed.compareAndSet(false, true)) { - assert rwl.isWriteLockedByCurrentThread() || failEngineLock.isHeldByCurrentThread() + // For composite IndexWriter, we need to validate that lock is on either the new map or old map. This is because, + // map may rotate in between the time when lock was taken on composite IndexWriter and assertion is made. In + // this case, write lock may not be present on the new map, but lock maybe present on the old map. In this + // scenario, no new entry will be created in the new map as write on new map will be blocked due to an active + // write lock on rwl lock taken in the close function call. Once close call completes, new entry in new map + // cannot be created due to ensureOpen call in Composite IndexWriter. + // TODO: Simulate this with a unit test. + assert (isWriteLockHeld()) || failEngineLock.isHeldByCurrentThread() : "Either the write lock must be held or the engine must be currently be failing itself"; try { this.versionMap.clear(); @@ -2284,7 +2371,7 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { // no need to commit in this case!, we snapshot before we close the shard, so translog and all sync'ed logger.trace("rollback indexWriter"); try { - indexWriter.rollback(); + documentIndexWriter.rollback(); } catch (AlreadyClosedException ex) { failOnTragicEvent(ex); throw ex; @@ -2303,6 +2390,10 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) { } } + private boolean isWriteLockHeld() { + return rwl.isWriteLockedByCurrentThread() && documentIndexWriter.isWriteLockedByCurrentThread(); + } + @Override protected final ReferenceManager getReferenceManager(SearcherScope scope) { switch (scope) { @@ -2315,9 +2406,26 @@ protected final ReferenceManager getReferenceManager( } } + /** + * We should only take care of reopening parent writer here as we will not be concerned on child level writer which + * will be rollback. Since after rollback we do care about any new writes that came in and in the last commit, we would + * have synced files as well, we should not care about child level writer or updates map as they will belong to new + * write post last commit. + * @return + * @throws IOException + */ private IndexWriter createWriter() throws IOException { try { - final IndexWriterConfig iwc = getIndexWriterConfig(); + IndexWriterConfig iwc = NativeLuceneIndexWriterFactory.IndexWriterConfigBuilder.builder() + .mergeScheduler(mergeScheduler) + .commitOnClose(false) + .openMode(IndexWriterConfig.OpenMode.APPEND) + .deletionPolicy(combinedDeletionPolicy) + .softDeletesPolicy(softDeletesPolicy) + .engineConfig(engineConfig) + .logger(logger) + .buildIndexWriterConfig(); + return createWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { logger.warn("could not lock IndexWriter", ex); @@ -2327,83 +2435,12 @@ private IndexWriter createWriter() throws IOException { // pkg-private for testing IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOException { - if (Assertions.ENABLED) { - return new AssertingIndexWriter(directory, iwc); - } else { - return new IndexWriter(directory, iwc); - } + return nativeIndexWriterFactory.createWriter(directory, iwc); } - private IndexWriterConfig getIndexWriterConfig() { - final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); - iwc.setCommitOnClose(false); // we by default don't commit on close - iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); - iwc.setIndexDeletionPolicy(combinedDeletionPolicy); - // with tests.verbose, lucene sets this up: plumb to align with filesystem stream - boolean verbose = false; - try { - verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); - } catch (Exception ignore) {} - iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); - iwc.setMergeScheduler(mergeScheduler); - // Give us the opportunity to upgrade old segments while performing - // background merges - MergePolicy mergePolicy = config().getMergePolicy(); - // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes. - iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD); - mergePolicy = new RecoverySourcePruneMergePolicy( - SourceFieldMapper.RECOVERY_SOURCE_NAME, - softDeletesPolicy::getRetentionQuery, - new SoftDeletesRetentionMergePolicy( - Lucene.SOFT_DELETES_FIELD, - softDeletesPolicy::getRetentionQuery, - new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME) - ) - ); - boolean shuffleForcedMerge = Booleans.parseBoolean(System.getProperty("opensearch.shuffle_forced_merge", Boolean.TRUE.toString())); - if (shuffleForcedMerge) { - // We wrap the merge policy for all indices even though it is mostly useful for time-based indices - // but there should be no overhead for other type of indices so it's simpler than adding a setting - // to enable it. - mergePolicy = new ShuffleForcedMergePolicy(mergePolicy); - } - - if (config().getIndexSettings().isMergeOnFlushEnabled()) { - final long maxFullFlushMergeWaitMillis = config().getIndexSettings().getMaxFullFlushMergeWaitTime().millis(); - if (maxFullFlushMergeWaitMillis > 0) { - iwc.setMaxFullFlushMergeWaitMillis(maxFullFlushMergeWaitMillis); - final Optional> mergeOnFlushPolicy = config().getIndexSettings().getMergeOnFlushPolicy(); - if (mergeOnFlushPolicy.isPresent()) { - mergePolicy = mergeOnFlushPolicy.get().apply(mergePolicy); - } - } - } else { - // Disable merge on refresh - iwc.setMaxFullFlushMergeWaitMillis(0); - } - - iwc.setCheckPendingFlushUpdate(config().getIndexSettings().isCheckPendingFlushEnabled()); - iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy)); - iwc.setSimilarity(engineConfig.getSimilarity()); - iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); - iwc.setCodec(engineConfig.getCodec()); - iwc.setUseCompoundFile(engineConfig.useCompoundFile()); - if (config().getIndexSort() != null) { - iwc.setIndexSort(config().getIndexSort()); - if (config().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_3_2_0)) { - iwc.setParentField(Lucene.PARENT_FIELD); - } - } - if (config().getLeafSorter() != null) { - iwc.setLeafSorter(config().getLeafSorter()); // The default segment search order - } - IndexSettings indexSettings = config().getIndexSettings(); - if (indexSettings.isDocumentReplication() == false - && (indexSettings.isSegRepLocalEnabled() || indexSettings.isRemoteStoreEnabled())) { - assert null != config().getIndexReaderWarmer(); - iwc.setMergedSegmentWarmer(config().getIndexReaderWarmer()); - } - return iwc; + @Override + public boolean refreshNeeded() { + return documentIndexWriter.hasNewIndexingOrUpdates() || super.refreshNeeded(); } /** @@ -2473,7 +2510,7 @@ long getGcDeletesInMillis() { } LiveIndexWriterConfig getCurrentIndexWriterConfig() { - return indexWriter.getConfig(); + return documentIndexWriter.getConfig(); } private final class EngineMergeScheduler extends OpenSearchConcurrentMergeScheduler { @@ -2504,7 +2541,7 @@ public synchronized void afterMerge(OnGoingMerge merge) { deactivateThrottling(); } } - if (indexWriter.hasPendingMerges() == false + if (documentIndexWriter.hasPendingMerges() == false && System.nanoTime() - lastWriteNanos >= engineConfig.getFlushMergesAfter().nanos()) { // NEVER do this on a merge thread since we acquire some locks blocking here and if we concurrently rollback the writer // we deadlock on engine#close for instance. @@ -2558,7 +2595,7 @@ protected void doRun() throws Exception { * @param writer the index writer to commit * @param translogUUID the translogUUID */ - protected void commitIndexWriter(final IndexWriter writer, final String translogUUID) throws IOException { + protected void commitIndexWriter(final DocumentIndexWriter writer, final String translogUUID) throws IOException { translogManager.ensureCanFlush(); try { final long localCheckpoint = localCheckpointTracker.getProcessedCheckpoint(); @@ -2587,6 +2624,11 @@ protected void commitIndexWriter(final IndexWriter writer, final String translog return commitData.entrySet().iterator(); }); shouldPeriodicallyFlushAfterBigMerge.set(false); + if (isContextAwareEnabled) { + // To sync document during commit. This keeps documents during commit always ahead of checkpoint. + refresh("commit", SearcherScope.INTERNAL, true); + } + writer.commit(); } catch (final Exception ex) { try { @@ -2832,7 +2874,7 @@ public Closeable acquireHistoryRetentionLock() { /** * Gets the commit data from {@link IndexWriter} as a map. */ - protected static Map commitDataAsMap(final IndexWriter indexWriter) { + protected static Map commitDataAsMap(final DocumentIndexWriter indexWriter) { final Map commitData = new HashMap<>(8); for (Map.Entry entry : indexWriter.getLiveCommitData()) { commitData.put(entry.getKey(), entry.getValue()); @@ -2840,28 +2882,6 @@ protected static Map commitDataAsMap(final IndexWriter indexWrit return commitData; } - /** - * Internal Asserting Index Writer - * - * @opensearch.internal - */ - private static class AssertingIndexWriter extends IndexWriter { - AssertingIndexWriter(Directory d, IndexWriterConfig conf) throws IOException { - super(d, conf); - } - - @Override - public long updateDocuments(Term delTerm, Iterable> docs) { - throw new AssertionError("must not hard update documents"); - } - - @Override - public long tryDeleteDocument(IndexReader readerIn, int docID) { - assert false : "#tryDeleteDocument is not supported. See Lucene#DirectoryReaderWithAllLiveDocs"; - throw new UnsupportedOperationException(); - } - } - /** * Returned the last local checkpoint value has been refreshed internally. */ diff --git a/server/src/main/java/org/opensearch/index/engine/LookupMapLockAcquisitionException.java b/server/src/main/java/org/opensearch/index/engine/LookupMapLockAcquisitionException.java new file mode 100644 index 0000000000000..1571b9893789d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/LookupMapLockAcquisitionException.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.index.shard.ShardId; + +import java.io.IOException; + +/** + * This exception indicates that CompositeIndexWriter was unable to obtain lock on CriteriaBasedIndexWriterLookup map + * during indexing. + * indexing request contains this Exception in the response, we do not need to add a translog entry for this request. + * + */ +public class LookupMapLockAcquisitionException extends EngineException { + public LookupMapLockAcquisitionException(StreamInput in) throws IOException { + super(in); + } + + @Override + public Throwable fillInStackTrace() { + // This is on the hot path for updates; stack traces are expensive to compute and not very useful for VCEEs, so don't fill it in. + return this; + } + + public LookupMapLockAcquisitionException(ShardId shardId, String msg, Throwable cause, Object... params) { + super(shardId, msg, cause, params); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/LuceneIndexWriter.java b/server/src/main/java/org/opensearch/index/engine/LuceneIndexWriter.java new file mode 100644 index 0000000000000..ef349d8dfdd5f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/LuceneIndexWriter.java @@ -0,0 +1,228 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LiveIndexWriterConfig; +import org.apache.lucene.index.Term; +import org.opensearch.common.lease.Releasable; +import org.opensearch.index.mapper.ParseContext; + +import java.io.IOException; +import java.util.Map; + +/** + * Wrapper class for IndexWriter. It delegates all calls to underlying IndexWriter. + * + */ +public class LuceneIndexWriter implements DocumentIndexWriter { + private final IndexWriter indexWriter; + + /** + * Constructor for LuceneIndexWriter. + * + * @param indexWriter the underlying IndexWriter to which all function calls are delegated to. + */ + public LuceneIndexWriter(IndexWriter indexWriter) { + this.indexWriter = indexWriter; + } + + /** + * Wrapper function over IndexWriter.getFlushingBytes. + * + * @return the number of bytes currently being flushed by underlying IndexWriter. + */ + @Override + public long getFlushingBytes() { + return indexWriter.getFlushingBytes(); + } + + /** + * Wrapper function over IndexWriter.getPendingNumDocs. + * + * @return Returns the number of documents in the index including documents are being added (i. e., reserved) for underlying IndexWriter. + */ + @Override + public long getPendingNumDocs() { + return indexWriter.getPendingNumDocs(); + } + + /** + * Wrapper function for IndexWriter.getConfig. + * + * @return Returns a LiveIndexWriterConfig, which can be used to query the underlying IndexWriter current settings, + * as well as modify "live" ones. + */ + @Override + public LiveIndexWriterConfig getConfig() { + return indexWriter.getConfig(); + } + + /** + * Wrapper function for IndexWriter.hasPendingMerges. + * + * @return returns true if there are merges waiting to be scheduled for underlying IndexWriter. + */ + @Override + public boolean hasPendingMerges() { + return indexWriter.hasPendingMerges(); + } + + /** + * Wrapper function for IndexWriter.hasUncommittedChanges + * + * @return Returns true if there may be changes that have not been committed for underlying IndexWriter. + */ + @Override + public boolean hasUncommittedChanges() { + return indexWriter.hasUncommittedChanges(); + } + + /** + * Wrapper function for IndexWriter.getTragicException + * + * @return Associated tragic exception for underlying IndexWriter. + */ + @Override + public Throwable getTragicException() { + return indexWriter.getTragicException(); + } + + @Override + public long ramBytesUsed() { + return indexWriter.ramBytesUsed(); + } + + @Override + public void setLiveCommitData(Iterable> commitUserData) { + indexWriter.setLiveCommitData(commitUserData); + } + + @Override + public long commit() throws IOException { + return indexWriter.commit(); + } + + @Override + public Iterable> getLiveCommitData() { + return indexWriter.getLiveCommitData(); + } + + @Override + public void rollback() throws IOException { + indexWriter.rollback(); + } + + @Override + public void close() throws IOException { + indexWriter.close(); + } + + @Override + public void deleteUnusedFiles() throws IOException { + indexWriter.deleteUnusedFiles(); + } + + @Override + public long addDocuments(Iterable docs, Term uid) throws IOException { + return indexWriter.addDocuments(docs); + } + + @Override + public long addDocument(ParseContext.Document doc, Term uid) throws IOException { + return indexWriter.addDocument(doc); + } + + @Override + public void softUpdateDocuments( + Term uid, + Iterable docs, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException { + indexWriter.softUpdateDocuments(uid, docs, softDeletesField); + } + + @Override + public void softUpdateDocument( + Term uid, + ParseContext.Document doc, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException { + indexWriter.softUpdateDocument(uid, doc, softDeletesField); + } + + @Override + public void deleteDocument( + Term uid, + boolean isStaleOperation, + ParseContext.Document doc, + long version, + long seqNo, + long primaryTerm, + Field... softDeletesField + ) throws IOException { + if (isStaleOperation) { + indexWriter.addDocument(doc); + } else { + indexWriter.softUpdateDocument(uid, doc, softDeletesField); + } + } + + @Override + public void forceMergeDeletes(boolean doWait) throws IOException { + indexWriter.forceMergeDeletes(doWait); + } + + @Override + public void maybeMerge() throws IOException { + indexWriter.maybeMerge(); + } + + @Override + public void forceMerge(int maxNumSegments, boolean doWait) throws IOException { + indexWriter.forceMerge(maxNumSegments, doWait); + } + + @Override + public IndexWriter getAccumulatingIndexWriter() { + return indexWriter; + } + + // Always return false here so that result of refreshNeeded is always equal to super.refreshNeeded() + @Override + public boolean hasNewIndexingOrUpdates() { + return false; + } + + public boolean isWriteLockedByCurrentThread() { + return true; + } + + @Override + public void beforeRefresh() throws IOException { + // Keep this no ops for Lucene IndexWriter. + } + + @Override + public void afterRefresh(boolean b) throws IOException { + // Keep this no ops. + } + + @Override + public Releasable obtainWriteLockOnAllMap() { + return () -> {}; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/NativeLuceneIndexWriterFactory.java b/server/src/main/java/org/opensearch/index/engine/NativeLuceneIndexWriterFactory.java new file mode 100644 index 0000000000000..d117fb1bdd5b2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/NativeLuceneIndexWriterFactory.java @@ -0,0 +1,245 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.SoftDeletesRetentionMergePolicy; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.InfoStream; +import org.opensearch.Version; +import org.opensearch.common.Booleans; +import org.opensearch.common.lucene.LoggerInfoStream; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.core.Assertions; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.codec.CriteriaBasedCodec; +import org.opensearch.index.mapper.IdFieldMapper; +import org.opensearch.index.mapper.SourceFieldMapper; +import org.opensearch.index.shard.OpenSearchMergePolicy; + +import java.io.IOException; +import java.util.Optional; +import java.util.function.UnaryOperator; + +/** + * Factory class used to create Lucene's IndexWriter. + * + */ +public class NativeLuceneIndexWriterFactory implements IndexWriterFactory { + + @Override + public IndexWriter createWriter(Directory directory, IndexWriterConfig config) throws IOException { + if (Assertions.ENABLED) { + return new AssertingIndexWriter(directory, config); + } else { + return new IndexWriter(directory, config); + } + } + + @Override + public IndexWriter createWriter( + Directory directory, + MergeScheduler mergeScheduler, + Boolean commitOnClose, + IndexWriterConfig.OpenMode openMode, + CombinedDeletionPolicy deletionPolicy, + SoftDeletesPolicy softDeletesPolicy, + EngineConfig engineConfig, + Logger logger, + String associatedCriteria + ) throws IOException { + IndexWriterConfig config = IndexWriterConfigBuilder.builder() + .mergeScheduler(mergeScheduler) + .commitOnClose(commitOnClose) + .openMode(openMode) + .deletionPolicy(deletionPolicy) + .softDeletesPolicy(softDeletesPolicy) + .engineConfig(engineConfig) + .logger(logger) + .associatedCriteria(associatedCriteria) + .buildIndexWriterConfig(); + + return createWriter(directory, config); + } + + /** + * Factory class used to create Lucene's IndexWriter config. + * + */ + public static final class IndexWriterConfigBuilder { + private MergeScheduler mergeScheduler; + private Boolean commitOnClose; + private IndexWriterConfig.OpenMode openMode; + private CombinedDeletionPolicy deletionPolicy; + private SoftDeletesPolicy softDeletesPolicy; + private EngineConfig engineConfig; + private Logger logger; + private String associatedCriteria; + + private IndexWriterConfigBuilder() {} + + public static IndexWriterConfigBuilder builder() { + return new IndexWriterConfigBuilder(); + } + + public IndexWriterConfigBuilder mergeScheduler(MergeScheduler mergeScheduler) { + this.mergeScheduler = mergeScheduler; + return this; + } + + public IndexWriterConfigBuilder commitOnClose(Boolean commitOnClose) { + this.commitOnClose = commitOnClose; + return this; + } + + public IndexWriterConfigBuilder openMode(IndexWriterConfig.OpenMode openMode) { + this.openMode = openMode; + return this; + } + + public IndexWriterConfigBuilder deletionPolicy(CombinedDeletionPolicy deletionPolicy) { + this.deletionPolicy = deletionPolicy; + return this; + } + + public IndexWriterConfigBuilder softDeletesPolicy(SoftDeletesPolicy softDeletesPolicy) { + this.softDeletesPolicy = softDeletesPolicy; + return this; + } + + public IndexWriterConfigBuilder engineConfig(EngineConfig engineConfig) { + this.engineConfig = engineConfig; + return this; + } + + public IndexWriterConfigBuilder logger(Logger logger) { + this.logger = logger; + return this; + } + + public IndexWriterConfigBuilder associatedCriteria(String associatedCriteria) { + this.associatedCriteria = associatedCriteria; + return this; + } + + public IndexWriterConfig buildIndexWriterConfig() { + final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); + iwc.setCommitOnClose(commitOnClose); + iwc.setOpenMode(openMode); + if (openMode == IndexWriterConfig.OpenMode.CREATE) { + iwc.setIndexCreatedVersionMajor(engineConfig.getIndexSettings().getIndexVersionCreated().luceneVersion.major); + } + + if (deletionPolicy != null) { + // For child IndexWriter, we are not setting deletion policy. + iwc.setIndexDeletionPolicy(deletionPolicy); + } + + // with tests.verbose, lucene sets this up: plumb to align with filesystem stream + boolean verbose = false; + try { + verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); + } catch (Exception ignore) {} + iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); + iwc.setMergeScheduler(mergeScheduler); + // Give us the opportunity to upgrade old segments while performing + // background merges + MergePolicy mergePolicy = engineConfig.getMergePolicy(); + // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes. + iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD); + mergePolicy = new RecoverySourcePruneMergePolicy( + SourceFieldMapper.RECOVERY_SOURCE_NAME, + softDeletesPolicy::getRetentionQuery, + new SoftDeletesRetentionMergePolicy( + Lucene.SOFT_DELETES_FIELD, + softDeletesPolicy::getRetentionQuery, + new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME) + ) + ); + boolean shuffleForcedMerge = Booleans.parseBoolean( + System.getProperty("opensearch.shuffle_forced_merge", Boolean.TRUE.toString()) + ); + if (shuffleForcedMerge) { + // We wrap the merge policy for all indices even though it is mostly useful for time-based indices + // but there should be no overhead for other type of indices so it's simpler than adding a setting + // to enable it. + mergePolicy = new ShuffleForcedMergePolicy(mergePolicy); + } + if (engineConfig.getIndexSettings().isMergeOnFlushEnabled()) { + final long maxFullFlushMergeWaitMillis = engineConfig.getIndexSettings().getMaxFullFlushMergeWaitTime().millis(); + if (maxFullFlushMergeWaitMillis > 0) { + iwc.setMaxFullFlushMergeWaitMillis(maxFullFlushMergeWaitMillis); + final Optional> mergeOnFlushPolicy = engineConfig.getIndexSettings().getMergeOnFlushPolicy(); + if (mergeOnFlushPolicy.isPresent()) { + mergePolicy = mergeOnFlushPolicy.get().apply(mergePolicy); + } + } + } else { + // Disable merge on refresh + iwc.setMaxFullFlushMergeWaitMillis(0); + } + iwc.setCheckPendingFlushUpdate(engineConfig.getIndexSettings().isCheckPendingFlushEnabled()); + iwc.setMergePolicy(new OpenSearchMergePolicy(mergePolicy)); + iwc.setSimilarity(engineConfig.getSimilarity()); + iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac()); + if (engineConfig.getIndexSettings().isContextAwareEnabled()) { + iwc.setCodec(new CriteriaBasedCodec(engineConfig.getCodec(), associatedCriteria)); + } else { + iwc.setCodec(engineConfig.getCodec()); + } + + iwc.setUseCompoundFile(engineConfig.useCompoundFile()); + if (engineConfig.getIndexSort() != null) { + iwc.setIndexSort(engineConfig.getIndexSort()); + if (engineConfig.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_3_2_0)) { + iwc.setParentField(Lucene.PARENT_FIELD); + } + } + if (engineConfig.getLeafSorter() != null) { + iwc.setLeafSorter(engineConfig.getLeafSorter()); // The default segment search order + } + IndexSettings indexSettings = engineConfig.getIndexSettings(); + if (indexSettings.isDocumentReplication() == false + && (indexSettings.isSegRepLocalEnabled() || indexSettings.isRemoteStoreEnabled())) { + assert null != engineConfig.getIndexReaderWarmer(); + iwc.setMergedSegmentWarmer(engineConfig.getIndexReaderWarmer()); + } + return iwc; + } + } + + /** + * Internal Asserting Index Writer + * + * @opensearch.internal + */ + private static class AssertingIndexWriter extends IndexWriter { + AssertingIndexWriter(Directory d, IndexWriterConfig conf) throws IOException { + super(d, conf); + } + + @Override + public long updateDocuments(Term delTerm, Iterable> docs) { + throw new AssertionError("must not hard update documents"); + } + + @Override + public long tryDeleteDocument(IndexReader readerIn, int docID) { + assert false : "#tryDeleteDocument is not supported. See Lucene#DirectoryReaderWithAllLiveDocs"; + throw new UnsupportedOperationException(); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index dbed65eecce47..72be6d79aabee 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4978,10 +4978,13 @@ public final boolean isSearchIdle() { * after a refresh, so we don't want to wait for a search to trigger that cycle. Replicas will only refresh after receiving * a new set of segments. */ + // TODO: Should we disable this as data will never get in sync if we use search idle for context aware segments. public final boolean isSearchIdleSupported() { - // If the index is remote store backed, then search idle is not supported. This is to ensure that async refresh + // If the index is remote store backed, then search idle is not supported. This is to ensure that async + // refresh. If the index is context aware enabled, search idle is not supported. This will ensure periodic sync + // between child and parent IndexWriter. // task continues to upload to remote store periodically. - if (isRemoteTranslogEnabled() || indexSettings.isAssignedOnRemoteNode()) { + if (isRemoteTranslogEnabled() || indexSettings.isAssignedOnRemoteNode() || indexSettings.isContextAwareEnabled()) { return false; } return indexSettings.isSegRepEnabledOrRemoteNode() == false || indexSettings.getNumberOfReplicas() == 0; diff --git a/server/src/main/java/org/opensearch/index/store/CompositeDirectory.java b/server/src/main/java/org/opensearch/index/store/CompositeDirectory.java index cb95c31941a0e..51aec1c7045e3 100644 --- a/server/src/main/java/org/opensearch/index/store/CompositeDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/CompositeDirectory.java @@ -56,7 +56,7 @@ @ExperimentalApi public class CompositeDirectory extends FilterDirectory { private static final Logger logger = LogManager.getLogger(CompositeDirectory.class); - protected final FSDirectory localDirectory; + protected final Directory localDirectory; protected final RemoteSegmentStoreDirectory remoteDirectory; protected final FileCache fileCache; protected final TransferManager transferManager; @@ -71,7 +71,7 @@ public class CompositeDirectory extends FilterDirectory { public CompositeDirectory(Directory localDirectory, Directory remoteDirectory, FileCache fileCache, ThreadPool threadPool) { super(localDirectory); validate(localDirectory, remoteDirectory, fileCache); - this.localDirectory = (FSDirectory) localDirectory; + this.localDirectory = localDirectory; this.remoteDirectory = (RemoteSegmentStoreDirectory) remoteDirectory; this.fileCache = fileCache; this.threadPool = threadPool; @@ -345,7 +345,7 @@ public IndexInput openInput(String name, IOContext context) throws IOException { new StoreFileMetadata(name, uploadedSegmentMetadata.getLength(), uploadedSegmentMetadata.getChecksum(), Version.LATEST), null ); - return new OnDemandBlockSnapshotIndexInput(fileInfo, localDirectory, transferManager); + return new OnDemandBlockSnapshotIndexInput(fileInfo, getLocalFSDirectory(), transferManager); } } @@ -393,7 +393,19 @@ public void afterSyncToRemote(String file) { // Visibility public since we need it in IT tests public Path getFilePath(String name) { - return localDirectory.getDirectory().resolve(name); + return getLocalFSDirectory().getDirectory().resolve(name); + } + + private FSDirectory getLocalFSDirectory() { + FSDirectory localFSDirectory; + if (localDirectory instanceof FSDirectory) { + localFSDirectory = (FSDirectory) localDirectory; + } else { + // In this case it should be a FilterDirectory wrapped over FSDirectory as per above validation. + localFSDirectory = (FSDirectory) (((FilterDirectory) localDirectory).getDelegate()); + } + + return localFSDirectory; } /** @@ -411,9 +423,9 @@ private void validate(Directory localDirectory, Directory remoteDirectory, FileC if (fileCache == null) throw new IllegalStateException( "File Cache not initialized on this Node, cannot create Composite Directory without FileCache" ); - if (localDirectory instanceof FSDirectory == false) throw new IllegalStateException( - "For Composite Directory, local directory must be of type FSDirectory" - ); + if (localDirectory instanceof FSDirectory == false + && !(localDirectory instanceof FilterDirectory && ((FilterDirectory) localDirectory).getDelegate() instanceof FSDirectory)) + throw new IllegalStateException("For Composite Directory, local directory must be of type FSDirectory"); if (remoteDirectory instanceof RemoteSegmentStoreDirectory == false) throw new IllegalStateException( "For Composite Directory, remote directory must be of type RemoteSegmentStoreDirectory" ); diff --git a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java index f1c44a1815709..4b23ca6b19d7c 100644 --- a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java @@ -84,7 +84,7 @@ public Directory newDirectory(IndexSettings indexSettings, ShardPath path) throw return newFSDirectory(location, lockFactory, indexSettings); } - protected Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { final String storeType = indexSettings.getSettings() .get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.FS.getSettingsKey()); IndexModule.Type type; diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java index 57455d3bbdd7c..4c4e0d141a2cc 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java @@ -9,6 +9,7 @@ package org.opensearch.index.store; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockFactory; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.core.index.shard.ShardId; @@ -26,6 +27,7 @@ import org.opensearch.threadpool.ThreadPool; import java.io.IOException; +import java.nio.file.Path; import java.util.Map; import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; @@ -57,6 +59,11 @@ public RemoteSegmentStoreDirectoryFactory( this.threadPool = threadPool; } + @Override + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + return null; + } + @Override public Directory newDirectory(IndexSettings indexSettings, ShardPath path) throws IOException { String repositoryName = indexSettings.getRemoteStoreRepository(); diff --git a/server/src/main/java/org/opensearch/index/store/Store.java b/server/src/main/java/org/opensearch/index/store/Store.java index b8f06b1d70a79..efa1026baba53 100644 --- a/server/src/main/java/org/opensearch/index/store/Store.java +++ b/server/src/main/java/org/opensearch/index/store/Store.java @@ -54,6 +54,7 @@ import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -87,6 +88,7 @@ import org.opensearch.env.NodeEnvironment; import org.opensearch.env.ShardLock; import org.opensearch.env.ShardLockObtainFailedException; +import org.opensearch.index.BucketedCompositeDirectory; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.CombinedDeletionPolicy; import org.opensearch.index.engine.Engine; @@ -95,6 +97,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.translog.Translog; +import org.opensearch.plugins.IndexStorePlugin; import java.io.Closeable; import java.io.EOFException; @@ -123,6 +126,7 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.index.seqno.SequenceNumbers.LOCAL_CHECKPOINT_KEY; +import static org.opensearch.index.store.FsDirectoryFactory.INDEX_LOCK_FACTOR_SETTING; import static org.opensearch.index.store.Store.MetadataSnapshot.loadMetadata; /** @@ -179,6 +183,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref private final ShardPath shardPath; private final boolean isParentFieldEnabledVersion; private final boolean isIndexSortEnabled; + private final IndexStorePlugin.DirectoryFactory directoryFactory; // used to ref count files when a new Reader is opened for PIT/Scroll queries // prevents segment files deletion until the PIT/Scroll expires or is discarded @@ -192,7 +197,7 @@ protected void closeInternal() { }; public Store(ShardId shardId, IndexSettings indexSettings, Directory directory, ShardLock shardLock) { - this(shardId, indexSettings, directory, shardLock, OnClose.EMPTY, null); + this(shardId, indexSettings, directory, shardLock, OnClose.EMPTY, null, null); } public Store( @@ -202,6 +207,18 @@ public Store( ShardLock shardLock, OnClose onClose, ShardPath shardPath + ) { + this(shardId, indexSettings, directory, shardLock, onClose, shardPath, null); + } + + public Store( + ShardId shardId, + IndexSettings indexSettings, + Directory directory, + ShardLock shardLock, + OnClose onClose, + ShardPath shardPath, + IndexStorePlugin.DirectoryFactory directoryFactory ) { super(shardId, indexSettings); final TimeValue refreshInterval = indexSettings.getValue(INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING); @@ -213,6 +230,7 @@ public Store( this.shardPath = shardPath; this.isIndexSortEnabled = indexSettings.getIndexSortConfig().hasIndexSort(); this.isParentFieldEnabledVersion = indexSettings.getIndexVersionCreated().onOrAfter(org.opensearch.Version.V_3_2_0); + this.directoryFactory = directoryFactory; assert onClose != null; assert shardLock != null; assert shardLock.getShardId().equals(shardId); @@ -223,6 +241,14 @@ public Directory directory() { return directory; } + public Directory newTempDirectory(String pathString) throws IOException { + return directoryFactory.newFSDirectory( + shardPath.resolveIndex().resolve(pathString), + this.indexSettings.getValue(INDEX_LOCK_FACTOR_SETTING), + this.indexSettings + ); + } + public ShardPath shardPath() { return shardPath; } @@ -944,13 +970,22 @@ public DirectoryFileTransferTracker getDirectoryFileTransferTracker() { @Override public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException { long fileSize = from.fileLength(src); - beforeDownload(fileSize); + boolean isCopyingFromRemoteDirectory = !((from instanceof FSDirectory) + && ((((FSDirectory) from).getDirectory().toString().contains(BucketedCompositeDirectory.CHILD_DIRECTORY_PREFIX)))); + if (isCopyingFromRemoteDirectory) { + // Update the stats only when we are copying from remote to local directory. As this function gets called + // from addIndexes as well when data from child level writer is synced from parent level writer. + beforeDownload(fileSize); + } + boolean success = false; long startTime = System.currentTimeMillis(); try { super.copyFrom(from, src, dest, context); success = true; - afterDownload(fileSize, startTime); + if (isCopyingFromRemoteDirectory) { + afterDownload(fileSize, startTime); + } } finally { if (!success) { downloadFailed(fileSize, startTime); diff --git a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java index 10d85520c97d1..a442811a0cf84 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java @@ -10,6 +10,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.LockFactory; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.index.IndexSettings; import org.opensearch.index.remote.RemoteStoreEnums.PathType; @@ -57,6 +58,11 @@ public RemoteSnapshotDirectoryFactory( this.remoteStoreFileCache = remoteStoreFileCache; } + @Override + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + return null; + } + @Override public Directory newDirectory(IndexSettings indexSettings, ShardPath localShardPath) throws IOException { final String repositoryName = IndexSettings.SEARCHABLE_SNAPSHOT_REPOSITORY.get(indexSettings.getSettings()); diff --git a/server/src/main/java/org/opensearch/plugins/IndexStorePlugin.java b/server/src/main/java/org/opensearch/plugins/IndexStorePlugin.java index 052026c6b9c0d..b83880eac7533 100644 --- a/server/src/main/java/org/opensearch/plugins/IndexStorePlugin.java +++ b/server/src/main/java/org/opensearch/plugins/IndexStorePlugin.java @@ -33,6 +33,7 @@ package org.opensearch.plugins; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockFactory; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.Nullable; @@ -49,6 +50,7 @@ import org.opensearch.threadpool.ThreadPool; import java.io.IOException; +import java.nio.file.Path; import java.util.Collections; import java.util.Map; import java.util.Optional; @@ -66,7 +68,6 @@ public interface IndexStorePlugin { * * @opensearch.api */ - @FunctionalInterface @PublicApi(since = "1.0.0") interface DirectoryFactory { /** @@ -77,6 +78,8 @@ interface DirectoryFactory { * @throws IOException if an IOException occurs while opening the directory */ Directory newDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException; + + Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException; } /** @@ -164,7 +167,6 @@ default Optional getIndexStoreListener() { * * @opensearch.api */ - @FunctionalInterface @ExperimentalApi interface StoreFactory { /** @@ -185,6 +187,27 @@ Store newStore( Store.OnClose onClose, ShardPath shardPath ) throws IOException; + + /** + * Creates a new Store per shard. This method is called once per shard on shard creation. + * @param shardId the shard id + * @param indexSettings the shard's index settings + * @param directory the Lucene directory selected for this shard + * @param shardLock the shard lock to associate with the store + * @param onClose listener invoked on store close + * @param shardPath the shard path + * @param directoryFactory the directory path. + * @return a new Store instance + */ + Store newStore( + ShardId shardId, + IndexSettings indexSettings, + Directory directory, + ShardLock shardLock, + Store.OnClose onClose, + ShardPath shardPath, + DirectoryFactory directoryFactory + ) throws IOException; } /** diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 90f4d20ad07a5..b148e5415e168 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -1,3 +1,4 @@ org.opensearch.index.codec.composite.composite912.Composite912Codec org.opensearch.index.codec.composite.composite103.Composite103Codec org.opensearch.index.codec.composite.backward_codecs.composite101.Composite101Codec +org.opensearch.index.codec.CriteriaBasedCodec diff --git a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java index d011826e81af4..5ec2da27d0272 100644 --- a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java @@ -88,6 +88,7 @@ import org.opensearch.crypto.CryptoRegistryException; import org.opensearch.env.ShardLockObtainFailedException; import org.opensearch.index.engine.IngestionEngineException; +import org.opensearch.index.engine.LookupMapLockAcquisitionException; import org.opensearch.index.engine.RecoveryEngineException; import org.opensearch.index.query.QueryShardException; import org.opensearch.index.seqno.RetentionLeaseAlreadyExistsException; @@ -903,6 +904,7 @@ public void testIds() { ids.put(176, IngestionEngineException.class); ids.put(177, StreamException.class); ids.put(10001, IndexCreateBlockException.class); + ids.put(10002, LookupMapLockAcquisitionException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index 00bd99dd4b349..6340cfe145272 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -74,6 +74,7 @@ import org.opensearch.index.SegmentReplicationPressureService; import org.opensearch.index.VersionType; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.LookupMapLockAcquisitionException; import org.opensearch.index.engine.VersionConflictEngineException; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.Mapping; @@ -108,6 +109,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.LongSupplier; +import static org.opensearch.common.util.FeatureFlags.CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG; +import static org.opensearch.index.IndexSettingsTests.newIndexMeta; import static org.opensearch.index.remote.RemoteStoreTestsHelper.createIndexSettings; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; @@ -1218,6 +1221,72 @@ public void testRetries() throws Exception { latch.await(); } + @LockFeatureFlag(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG) + public void testRetriesWithLookupMapLockAcquisitionException() throws Exception { + Settings settings = Settings.builder().put(IndexSettings.INDEX_CONTEXT_AWARE_ENABLED_SETTING.getKey(), true).build(); + IndexSettings indexSettings = new IndexSettings(newIndexMeta("test", settings), Settings.EMPTY); + UpdateRequest writeRequest = new UpdateRequest("index", "id").doc(Requests.INDEX_CONTENT_TYPE, "field", "value"); + BulkItemRequest primaryRequest = new BulkItemRequest(0, writeRequest); + + IndexRequest updateResponse = new IndexRequest("index").id("id").source(Requests.INDEX_CONTENT_TYPE, "field", "value"); + + Exception err = new LookupMapLockAcquisitionException(shardId, "Unable to obtain lock on the current Lookup map", null); + Engine.IndexResult lookupMapExceptionResult = new Engine.IndexResult(err, 0); + Engine.IndexResult mappingUpdate = new Engine.IndexResult( + new Mapping(null, mock(RootObjectMapper.class), new MetadataFieldMapper[0], Collections.emptyMap()) + ); + Translog.Location resultLocation = new Translog.Location(42, 42, 42); + Engine.IndexResult success = new FakeIndexResult(1, 1, 13, true, resultLocation); + + IndexShard shard = mock(IndexShard.class); + when(shard.applyIndexOperationOnPrimary(anyLong(), any(), any(), anyLong(), anyLong(), anyLong(), anyBoolean())).thenAnswer(ir -> { + if (randomBoolean()) { + return lookupMapExceptionResult; + } else { + return success; + } + }); + when(shard.indexSettings()).thenReturn(indexSettings); + when(shard.shardId()).thenReturn(shardId); + when(shard.mapperService()).thenReturn(mock(MapperService.class)); + + UpdateHelper updateHelper = mock(UpdateHelper.class); + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( + new UpdateHelper.Result( + updateResponse, + randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, + Collections.singletonMap("field", "value"), + Requests.INDEX_CONTENT_TYPE + ) + ); + + BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; + BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); + + final CountDownLatch latch = new CountDownLatch(1); + TransportShardBulkAction.performOnPrimary( + bulkShardRequest, + shard, + updateHelper, + threadPool::absoluteTimeInMillis, + new NoopMappingUpdatePerformer(), + listener -> listener.onResponse(null), + new LatchedActionListener<>(ActionTestUtils.assertNoFailureListener(result -> { + assertThat(((WritePrimaryResult) result).location, equalTo(resultLocation)); + BulkItemResponse primaryResponse = result.replicaRequest().items()[0].getPrimaryResponse(); + assertThat(primaryResponse.getItemId(), equalTo(0)); + assertThat(primaryResponse.getId(), equalTo("id")); + assertThat(primaryResponse.getOpType(), equalTo(DocWriteRequest.OpType.UPDATE)); + DocWriteResponse response = primaryResponse.getResponse(); + assertThat(response.status(), equalTo(RestStatus.CREATED)); + assertThat(response.getSeqNo(), equalTo(13L)); + }), latch), + threadPool, + Names.WRITE + ); + latch.await(); + } + public void testUpdateWithRetryOnConflict() throws IOException, InterruptedException { IndexSettings indexSettings = new IndexSettings(indexMetadata(), Settings.EMPTY); @@ -1294,6 +1363,81 @@ public void testUpdateWithRetryOnConflict() throws IOException, InterruptedExcep }); } + @LockFeatureFlag(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG) + public void testRetriesWithLookupMapLockAcquisitionExceptionWithMaxRetry() throws IOException, InterruptedException { + int retryCount = randomIntBetween(6, 10); + Settings settings = Settings.builder() + .put(IndexSettings.INDEX_CONTEXT_AWARE_ENABLED_SETTING.getKey(), true) + .put(IndexSettings.INDEX_MAX_RETRY_ON_LOOKUP_MAP_LOCK_ACQUISITION_EXCEPTION.getKey(), retryCount) + .build(); + IndexSettings indexSettings = new IndexSettings(newIndexMeta("test", settings), Settings.EMPTY); + + int nItems = randomIntBetween(2, 5); + List items = new ArrayList<>(nItems); + for (int i = 0; i < nItems; i++) { + UpdateRequest updateRequest = new UpdateRequest("index", "id").doc(Requests.INDEX_CONTENT_TYPE, "field", "value"); + items.add(new BulkItemRequest(i, updateRequest)); + } + + IndexRequest updateResponse = new IndexRequest("index").id("id").source(Requests.INDEX_CONTENT_TYPE, "field", "value"); + + Exception err = new LookupMapLockAcquisitionException(shardId, "Unable to obtain lock on the current Lookup map", null); + Engine.IndexResult lookupMapExceptionResult = new Engine.IndexResult(err, 0); + + IndexShard shard = mock(IndexShard.class); + when(shard.applyIndexOperationOnPrimary(anyLong(), any(), any(), anyLong(), anyLong(), anyLong(), anyBoolean())).thenAnswer( + ir -> lookupMapExceptionResult + ); + when(shard.indexSettings()).thenReturn(indexSettings); + when(shard.shardId()).thenReturn(shardId); + when(shard.mapperService()).thenReturn(mock(MapperService.class)); + + UpdateHelper updateHelper = mock(UpdateHelper.class); + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( + new UpdateHelper.Result( + updateResponse, + randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, + Collections.singletonMap("field", "value"), + Requests.INDEX_CONTENT_TYPE + ) + ); + + BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items.toArray(BulkItemRequest[]::new)); + + final CountDownLatch latch = new CountDownLatch(1); + Runnable runnable = () -> TransportShardBulkAction.performOnPrimary( + bulkShardRequest, + shard, + updateHelper, + threadPool::absoluteTimeInMillis, + new NoopMappingUpdatePerformer(), + listener -> listener.onResponse(null), + new LatchedActionListener<>(ActionTestUtils.assertNoFailureListener(result -> { + assertEquals(nItems, result.replicaRequest().items().length); + for (BulkItemRequest item : result.replicaRequest().items()) { + assertEquals(LookupMapLockAcquisitionException.class, item.getPrimaryResponse().getFailure().getCause().getClass()); + } + }), latch), + threadPool, + Names.WRITE + ); + + // execute the runnable on a separate thread so that the infinite loop can be detected + new Thread(runnable).start(); + + // timeout the request in 10 seconds if there is an infinite loop + assertTrue(latch.await(10, TimeUnit.SECONDS)); + + items.forEach(item -> { + assertEquals(item.getPrimaryResponse().getFailure().getCause().getClass(), LookupMapLockAcquisitionException.class); + + // this assertion is based on the assumption that all bulk item requests are updates and are hence calling + // UpdateRequest::prepareRequest + UpdateRequest updateRequest = (UpdateRequest) item.request(); + verify(updateHelper, times(retryCount + 1)).prepare(eq(updateRequest), any(IndexShard.class), any(LongSupplier.class)); + }); + } + public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { TestThreadPool rejectingThreadPool = new TestThreadPool( "TransportShardBulkActionTests#testForceExecutionOnRejectionAfterMappingUpdate", diff --git a/server/src/test/java/org/opensearch/index/CriteriaBasedMergePolicyTests.java b/server/src/test/java/org/opensearch/index/CriteriaBasedMergePolicyTests.java new file mode 100644 index 0000000000000..a2cb711269656 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/CriteriaBasedMergePolicyTests.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.apache.lucene.codecs.lucene103.Lucene103Codec; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeTrigger; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.TieredMergePolicy; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.tests.index.BaseMergePolicyTestCase; +import org.apache.lucene.util.Version; +import org.opensearch.index.codec.CriteriaBasedCodec; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class CriteriaBasedMergePolicyTests extends OpenSearchTestCase { + + public void testFindMerges_EmptySegments() throws IOException { + CriteriaBasedMergePolicy mergePolicy = new CriteriaBasedMergePolicy(new TieredMergePolicy()); + SegmentInfos infos = createSegmentInfos(); + MergePolicy.MergeSpecification result = mergePolicy.findMerges( + MergeTrigger.SEGMENT_FLUSH, + infos, + new BaseMergePolicyTestCase.MockMergeContext(SegmentCommitInfo::getDelCount) + ); + + assertNull(result); + } + + public void testFindMerges_AllSegmentsMerging() throws IOException { + CriteriaBasedMergePolicy mergePolicy = new CriteriaBasedMergePolicy(new TieredMergePolicy()); + SegmentInfos infos = createSegmentInfos(); + SegmentCommitInfo sci1 = createSegmentCommitInfo("_1", "bucket1"); + SegmentCommitInfo sci2 = createSegmentCommitInfo("_2", "bucket1"); + infos.add(sci1); + infos.add(sci2); + + try { + Set mergingSegments = new HashSet<>(Arrays.asList(sci1, sci2)); + BaseMergePolicyTestCase.MockMergeContext mergeContext = new BaseMergePolicyTestCase.MockMergeContext( + SegmentCommitInfo::getDelCount + ); + mergeContext.setMergingSegments(mergingSegments); + + MergePolicy.MergeSpecification result = mergePolicy.findMerges(MergeTrigger.FULL_FLUSH, infos, mergeContext); + + assertNull(result); + } finally { + sci1.info.dir.close(); + sci2.info.dir.close(); + } + } + + public void testFindMerges_MultipleSegmentsInSingleGroup() throws IOException { + CriteriaBasedMergePolicy mergePolicy = new CriteriaBasedMergePolicy(new TieredMergePolicy()); + SegmentInfos infos = createSegmentInfos(); + SegmentCommitInfo sci1 = createSegmentCommitInfo("_1", "bucket1"); + SegmentCommitInfo sci2 = createSegmentCommitInfo("_2", "bucket1"); + infos.add(sci1); + infos.add(sci2); + BaseMergePolicyTestCase.MockMergeContext mergeContext = new BaseMergePolicyTestCase.MockMergeContext( + SegmentCommitInfo::getDelCount + ); + mergeContext.setMergingSegments(Collections.emptySet()); + MergePolicy.MergeSpecification result = mergePolicy.findMerges(MergeTrigger.FULL_FLUSH, infos, mergeContext); + + assertNotNull(result); + assertEquals(1, result.merges.size()); + } + + private SegmentInfos createSegmentInfos() { + return new SegmentInfos(Version.LATEST.major); + } + + private SegmentCommitInfo createSegmentCommitInfo(String segmentName, String bucketName) throws IOException { + Directory directory = mock(FSDirectory.class); + when(directory.fileLength(any())).thenReturn(5368709120L); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_10_1_0, + segmentName, + 5, + false, + false, + new Lucene103Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + segmentInfo.putAttribute(CriteriaBasedCodec.BUCKET_NAME, bucketName); + segmentInfo.setFiles(List.of(segmentName + ".cfe")); + return new SegmentCommitInfo(segmentInfo, 5, 10, 1, 0, 0, null); + } +} diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index 2f8d3cb9ad3d2..d3637aac98ae6 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -43,6 +43,7 @@ import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockFactory; import org.apache.lucene.tests.index.AssertingDirectoryReader; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; @@ -127,6 +128,7 @@ import org.hamcrest.Matchers; import java.io.IOException; +import java.nio.file.Path; import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -323,7 +325,7 @@ public void testRegisterIndexStore() throws IOException { ); final IndexService indexService = newIndexService(module); - assertThat(indexService.getDirectoryFactory(), instanceOf(FooFunction.class)); + assertThat(indexService.getDirectoryFactory(), instanceOf(IndexStorePlugin.DirectoryFactory.class)); indexService.close("simon says", false); } @@ -830,6 +832,11 @@ public static final class FooFunction implements IndexStorePlugin.DirectoryFacto public Directory newDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException { return new FsDirectoryFactory().newDirectory(indexSettings, shardPath); } + + @Override + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + return null; + } } public static final class Wrapper implements CheckedFunction { @@ -840,6 +847,19 @@ public DirectoryReader apply(DirectoryReader reader) { } public static final class TestStoreFactory implements IndexStorePlugin.StoreFactory { + @Override + public Store newStore( + ShardId shardId, + IndexSettings indexSettings, + Directory directory, + ShardLock shardLock, + Store.OnClose onClose, + ShardPath shardPath, + IndexStorePlugin.DirectoryFactory directoryFactory + ) throws IOException { + return new Store(shardId, indexSettings, directory, shardLock, onClose, shardPath, directoryFactory); + } + @Override public Store newStore( ShardId shardId, diff --git a/server/src/test/java/org/opensearch/index/MergeSchedulerSettingsTests.java b/server/src/test/java/org/opensearch/index/MergeSchedulerSettingsTests.java index baaf584702f78..576e509f240de 100644 --- a/server/src/test/java/org/opensearch/index/MergeSchedulerSettingsTests.java +++ b/server/src/test/java/org/opensearch/index/MergeSchedulerSettingsTests.java @@ -157,6 +157,7 @@ private static IndexMetadata createMetadata(int maxThreadCount, int maxMergeCoun return newIndexMeta("index", builder.build()); } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/19866") public void testMaxThreadAndMergeCount() { IllegalArgumentException exc = expectThrows( IllegalArgumentException.class, diff --git a/server/src/test/java/org/opensearch/index/engine/CompositeIndexWriterForAppendTests.java b/server/src/test/java/org/opensearch/index/engine/CompositeIndexWriterForAppendTests.java new file mode 100644 index 0000000000000..7625bc5945e5a --- /dev/null +++ b/server/src/test/java/org/opensearch/index/engine/CompositeIndexWriterForAppendTests.java @@ -0,0 +1,631 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LogByteSizeMergePolicy; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.FilterIndexOutput; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.common.CheckedBiFunction; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.lucene.uid.Versions; +import org.opensearch.common.util.concurrent.ReleasableLock; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.BucketedCompositeDirectory; +import org.opensearch.index.VersionType; +import org.opensearch.index.mapper.ParsedDocument; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; +import static org.mockito.Mockito.mock; + +public class CompositeIndexWriterForAppendTests extends CriteriaBasedCompositeIndexWriterBaseTests { + + // For refresh + public void testGetIndexWriterWithRotatingMapAlwaysPutWriterInCurrentMap() throws IOException, InterruptedException { + AtomicReference liveIndexWriterDeletesMap = new AtomicReference<>( + new CompositeIndexWriter.LiveIndexWriterDeletesMap() + ); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean run = new AtomicBoolean(true); + Thread refresher = new Thread(() -> { + while (run.get()) { + latch.countDown(); + liveIndexWriterDeletesMap.set(liveIndexWriterDeletesMap.get().buildTransitionMap()); + liveIndexWriterDeletesMap.set(liveIndexWriterDeletesMap.get().invalidateOldMap()); + } + }); + + refresher.start(); + try { + latch.await(); + int numOps = 100; + CompositeIndexWriter.DisposableIndexWriter disposableIndexWriter; + while (numOps > 0) { + disposableIndexWriter = liveIndexWriterDeletesMap.get() + .computeIndexWriterIfAbsentForCriteria("200", this::createChildWriterFactory, new ShardId("foo", "_na_", 1)); + assertNotNull(disposableIndexWriter); + assertFalse(disposableIndexWriter.getLookupMap().isClosed()); + disposableIndexWriter.getIndexWriter().close(); + disposableIndexWriter.getIndexWriter().getDirectory().close(); + numOps--; + } + } finally { + run.set(false); + refresher.join(); + } + } + + public void testConcurrentBuildTransitionAndInvalidateForIndexWriterDeleteMap() throws InterruptedException { + CompositeIndexWriter.LiveIndexWriterDeletesMap map = new CompositeIndexWriter.LiveIndexWriterDeletesMap(); + AtomicReference currentMapRef = new AtomicReference<>(map); + AtomicBoolean running = new AtomicBoolean(true); + int iterations = 100; + + // Thread that continuously builds transition maps and invalidates old ones + Thread refreshThread = new Thread(() -> { + int count = 0; + while (running.get() && count < iterations) { + CompositeIndexWriter.LiveIndexWriterDeletesMap current = currentMapRef.get(); + CompositeIndexWriter.LiveIndexWriterDeletesMap transition = current.buildTransitionMap(); + CompositeIndexWriter.LiveIndexWriterDeletesMap invalidated = transition.invalidateOldMap(); + currentMapRef.set(invalidated); + count++; + Thread.yield(); + } + }); + + refreshThread.start(); + + // Let it run for a bit + Thread.sleep(100); + running.set(false); + refreshThread.join(1000); + + // Verify the final map is in a valid state + CompositeIndexWriter.LiveIndexWriterDeletesMap finalMap = currentMapRef.get(); + assertNotNull(finalMap.current); + assertNotNull(finalMap.old); + } + + public void testConcurrentComputeIndexWriterWithMapRotation() throws Exception { + AtomicBoolean stopped = new AtomicBoolean(); + Semaphore indexedDocs = new Semaphore(0); + AtomicInteger computeCount = new AtomicInteger(0); + AtomicInteger rotationCount = new AtomicInteger(0); + AtomicReference mapRef = new AtomicReference<>( + new CompositeIndexWriter.LiveIndexWriterDeletesMap() + ); + CheckedBiFunction< + String, + CompositeIndexWriter.CriteriaBasedIndexWriterLookup, + CompositeIndexWriter.DisposableIndexWriter, + IOException> supplier = (crit, lookup) -> mock(CompositeIndexWriter.DisposableIndexWriter.class); + + // Compute thread + Thread computeThread = new Thread(() -> { + while (stopped.get() == false) { + try { + CompositeIndexWriter.LiveIndexWriterDeletesMap currentMap = mapRef.get(); + currentMap.computeIndexWriterIfAbsentForCriteria("test-criteria", supplier, new ShardId("foo", "_na_", 1)); + computeCount.incrementAndGet(); + indexedDocs.release(); + } catch (Exception e) { + + } + } + }); + + Thread rotationThread = new Thread(() -> { + while (stopped.get() == false) { + CompositeIndexWriter.LiveIndexWriterDeletesMap current = mapRef.get(); + CompositeIndexWriter.LiveIndexWriterDeletesMap transition = current.buildTransitionMap(); + CompositeIndexWriter.LiveIndexWriterDeletesMap invalidated = transition.invalidateOldMap(); + mapRef.set(invalidated); + rotationCount.incrementAndGet(); + } + }); + + try { + rotationThread.start(); + computeThread.start(); + indexedDocs.acquire(100); + } finally { + stopped.set(true); + computeThread.join(); + rotationThread.join(); + } + + assertTrue("Compute operations completed: " + computeCount.get(), computeCount.get() >= 100); + assertTrue("Rotation operations completed: " + rotationCount.get(), rotationCount.get() >= 0); + } + + public void testUnableToObtainLockOnActiveLookupWhenWriteLockDuringIndexing() throws IOException, InterruptedException { + CompositeIndexWriter.LiveIndexWriterDeletesMap map = new CompositeIndexWriter.LiveIndexWriterDeletesMap(); + CountDownLatch writeLockAcquiredLatch = new CountDownLatch(1); + CountDownLatch releaseWriteLockLatch = new CountDownLatch(1); + Thread writer = new Thread(() -> { + try (ReleasableLock ignore = map.acquireCurrentWriteLock()) { + writeLockAcquiredLatch.countDown(); + releaseWriteLockLatch.await(); + } catch (InterruptedException ignored) { + + } + }); + + writer.start(); + writeLockAcquiredLatch.await(1, TimeUnit.SECONDS); + + expectThrows( + LookupMapLockAcquisitionException.class, + () -> map.computeIndexWriterIfAbsentForCriteria("200", this::createChildWriterFactory, new ShardId("foo", "_na_", 1)) + ); + releaseWriteLockLatch.countDown(); + writer.join(); + } + + public void testConcurrentIndexingDuringRefresh() throws IOException, InterruptedException { + + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + try { + AtomicBoolean run = new AtomicBoolean(true); + Thread indexer = new Thread(() -> { + while (run.get()) { + String id = Integer.toString(randomIntBetween(1, 100)); + try { + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } catch (IOException e) { + throw new AssertionError(e); + } catch (AlreadyClosedException e) { + return; + } + } + }); + + Thread refresher = new Thread(() -> { + while (run.get()) { + try { + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + } catch (IOException e) {} + } + }); + indexer.start(); + refresher.start(); + run.set(false); + indexer.join(); + refresher.join(); + } finally { + IOUtils.close(compositeIndexWriter); + } + } + + public void testConcurrentIndexAndDeleteDuringRefresh() throws IOException, InterruptedException { + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + int numDocs = scaledRandomIntBetween(100, 1000); + CountDownLatch latch = new CountDownLatch(2); + AtomicBoolean done = new AtomicBoolean(false); + AtomicInteger numDeletes = new AtomicInteger(); + + Thread indexer = new Thread(() -> { + try { + latch.countDown(); + latch.await(); + for (int i = 0; i < numDocs; i++) { + String id = Integer.toString(i); + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + if (rarely()) { + compositeIndexWriter.deleteDocument( + operation.uid(), + false, + newDeleteTombstoneDoc(id), + 1, + 2, + primaryTerm.get(), + softDeletesField + ); + + numDeletes.incrementAndGet(); + } + } + } catch (Exception e) { + throw new AssertionError(e); + } finally { + done.set(true); + } + }); + + indexer.start(); + latch.countDown(); + latch.await(); + while (done.get() == false) { + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + Thread.sleep(100); + } + + indexer.join(); + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + try (DirectoryReader directoryReader = DirectoryReader.open(compositeIndexWriter.getAccumulatingIndexWriter())) { + assertEquals(numDocs - numDeletes.get(), directoryReader.numDocs()); + } finally { + IOUtils.close(compositeIndexWriter); + } + } + + public void testTreatDocumentFailureAsFatalErrorOnGroupSpecificIndexWriter() throws IOException { + AtomicReference addDocException = new AtomicReference<>(); + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ) { + @Override + DisposableIndexWriter createChildWriterUtil(String associatedCriteria, CriteriaBasedIndexWriterLookup lookup) + throws IOException { + return new CompositeIndexWriter.DisposableIndexWriter( + new IndexWriter( + store.newTempDirectory( + BucketedCompositeDirectory.CHILD_DIRECTORY_PREFIX + associatedCriteria + "_" + UUID.randomUUID() + ), + newIndexWriterConfig() + ) { + @Override + public long addDocuments(Iterable> docs) throws IOException { + final IOException ex = addDocException.getAndSet(null); + if (ex != null) { + throw ex; + } + return super.addDocuments(docs); + } + }, + lookup + ); + } + }; + + String id = Integer.toString(randomIntBetween(1, 100)); + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + addDocException.set(new IOException("simulated")); + expectThrows(IOException.class, () -> compositeIndexWriter.addDocuments(operation.docs(), operation.uid())); + } finally { + IOUtils.close(compositeIndexWriter); + } + } + + public void testGetFlushingBytesAfterSmallDocuments() throws IOException { + IndexWriter parentWriter = createWriter(); + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + parentWriter, + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + compositeIndexWriter.getConfig().setRAMBufferSizeMB(128); + + try { + // Add a few small documents + for (int i = 0; i < 10; i++) { + Engine.Index operation = indexForDoc(createParsedDoc(String.valueOf(i), null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + // Should still be 0 or small since we haven't triggered a flush + long flushingBytes = compositeIndexWriter.getFlushingBytes(); + assertEquals("Flushing bytes should be non-negative", 0, flushingBytes); + } finally { + IOUtils.close(compositeIndexWriter); + } + } + + public void testHasPendingMergesInitiallyFalse() throws IOException { + IndexWriter parentWriter = createWriter(); + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + parentWriter, + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + assertFalse("Should have no pending merges initially", compositeIndexWriter.hasPendingMerges()); + } + + public void testHasPendingMergesDuringForceMerge() throws IOException, InterruptedException { + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy(); + mergePolicy.setMergeFactor(2); + compositeIndexWriter.getConfig().setMergePolicy(mergePolicy); + + try { + for (int i = 0; i < 4; i++) { + Engine.Index operation = indexForDoc(createParsedDoc(String.valueOf(i), null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + } + + final CountDownLatch mergeLatch = new CountDownLatch(1); + final AtomicBoolean hadPendingMerges = new AtomicBoolean(false); + + Thread mergeThread = new Thread(() -> { + try { + mergeLatch.countDown(); + compositeIndexWriter.forceMerge(1, true); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + mergeThread.start(); + mergeLatch.await(); + + // Check for pending merges during merge + Thread.sleep(50); // Give merge time to start + mergeThread.join(); + // After merge completes, should have no pending merges + assertFalse("Should have no pending merges after force merge completes", compositeIndexWriter.hasPendingMerges()); + } finally { + IOUtils.close(compositeIndexWriter); + } + } + + public void testGetTragicExceptionWithOutOfMemoryError() throws Exception { + AtomicBoolean shouldFail = new AtomicBoolean(false); + AtomicReference thrownError = new AtomicReference<>(); + + Directory dir = new FilterDirectory(newDirectory()) { + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { + IndexOutput out = super.createOutput(name, context); + return new FilterIndexOutput("failing output", "test", out) { + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + if (shouldFail.get() && name.endsWith(".cfe")) { + Error ex = new OutOfMemoryError("Simulated write failure"); + thrownError.set(ex); + throw ex; + } + super.writeBytes(b, offset, length); + } + }; + } + }; + + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(dir), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + compositeIndexWriter.getConfig().setMaxBufferedDocs(2); + // Add a document successfully + Engine.Index operation = indexForDoc(createParsedDoc(String.valueOf("-1"), null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + + // Enable failure + shouldFail.set(true); + + boolean hitError = false; + try { + // This should trigger the failure + for (int i = 0; i < 10; i++) { + operation = indexForDoc(createParsedDoc(String.valueOf(i), null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + compositeIndexWriter.commit(); + } catch (Error e) { + hitError = true; + } + + if (hitError && thrownError.get() != null) { + Throwable tragic = compositeIndexWriter.getTragicException(); + if (tragic != null) { + assertFalse("Writer should be closed after tragic exception", compositeIndexWriter.isOpen()); + } + } + + IOUtils.closeWhileHandlingException(compositeIndexWriter, dir); + } + + public void testRAMBytesUsedWithOldAndCurrentWriters() throws Exception { + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + try { + // Create documents in first criteria group + for (int i = 0; i < 10; i++) { + Engine.Index operation = indexForDoc(createParsedDoc(String.valueOf(i), null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + long ramAfterGroup1 = compositeIndexWriter.ramBytesUsed(); + for (int i = 0; i < 10; i++) { + Engine.Index operation = indexForDoc(createParsedDoc(String.valueOf(i), null, "testGroupingCriteria2")); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + long ramAfterGroup2 = compositeIndexWriter.ramBytesUsed(); + // RAM should account for both groups + assertTrue("RAM should account for multiple groups", ramAfterGroup2 >= ramAfterGroup1); + } finally { + IOUtils.close(compositeIndexWriter); + } + + } + + public void testSetLiveCommitDataWithRollback() throws Exception { + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + try { + // Create documents in first criteria group + for (int i = 0; i < 10; i++) { + Engine.Index operation = indexForDoc(createParsedDoc(String.valueOf(i), null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + Map data = new HashMap<>(); + data.put("status", "beforeCommit"); + compositeIndexWriter.setLiveCommitData(data.entrySet()); + compositeIndexWriter.commit(); + + data = new HashMap<>(); + data.put("status", "beforeRollback"); + // Rollback without committing + compositeIndexWriter.rollback(); + Engine.Index operation = indexForDoc(createParsedDoc(String.valueOf(13), null, "testGroupingCriteria1")); + expectThrows(AlreadyClosedException.class, () -> compositeIndexWriter.addDocuments(operation.docs(), operation.uid())); + + // Reopen writer + try ( + CompositeIndexWriter compositeIndexWriterForRollback = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ) + ) { + for (Map.Entry entry : compositeIndexWriterForRollback.getLiveCommitData()) { + if (entry.getKey().equals("status")) { + assertEquals("beforeCommit", entry.getValue()); + } + } + } + + } finally { + IOUtils.close(compositeIndexWriter); + } + } + + public void testObtainLock() throws Exception { + try ( + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ) + ) { + try (Releasable lock = compositeIndexWriter.obtainWriteLockOnAllMap()) { + assertTrue(compositeIndexWriter.isWriteLockedByCurrentThread()); + } + } + } + + public void testHasBlocksMergeFullyDelSegments() throws Exception { + CompositeIndexWriter compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + + try { + Engine.Index operation = indexForDoc(createParsedDoc("foo", null, DEFAULT_CRITERIA)); + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + compositeIndexWriter.softUpdateDocuments(operation.uid(), operation.docs(), 2, 2, primaryTerm.get(), softDeletesField); + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + compositeIndexWriter.commit(); + compositeIndexWriter.softUpdateDocuments(operation.uid(), operation.docs(), 2, 2, primaryTerm.get(), softDeletesField); + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + compositeIndexWriter.forceMergeDeletes(true); + compositeIndexWriter.commit(); + try (DirectoryReader directoryReader = DirectoryReader.open(compositeIndexWriter.getAccumulatingIndexWriter())) { + assertEquals(1, directoryReader.leaves().size()); + assertFalse("hasBlocks should be cleared", directoryReader.leaves().get(0).reader().getMetaData().hasBlocks()); + } + } finally { + IOUtils.close(compositeIndexWriter); + } + } + + public Engine.Index appendOnlyPrimary(ParsedDocument doc, boolean retry, final long autoGeneratedIdTimestamp, boolean create) { + return new Engine.Index( + newUid(doc), + doc, + UNASSIGNED_SEQ_NO, + 1, + create ? Versions.MATCH_DELETED : Versions.MATCH_ANY, + VersionType.INTERNAL, + Engine.Operation.Origin.PRIMARY, + System.nanoTime(), + autoGeneratedIdTimestamp, + retry, + UNASSIGNED_SEQ_NO, + 0 + ); + } +} diff --git a/server/src/test/java/org/opensearch/index/engine/CompositeIndexWriterForUpdateAndDeletesTests.java b/server/src/test/java/org/opensearch/index/engine/CompositeIndexWriterForUpdateAndDeletesTests.java new file mode 100644 index 0000000000000..bb407092af7bd --- /dev/null +++ b/server/src/test/java/org/opensearch/index/engine/CompositeIndexWriterForUpdateAndDeletesTests.java @@ -0,0 +1,213 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.index.DirectoryReader; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.util.io.IOUtils; + +import java.io.IOException; + +public class CompositeIndexWriterForUpdateAndDeletesTests extends CriteriaBasedCompositeIndexWriterBaseTests { + + public void testDeleteWithDocumentInParentWriter() throws IOException { + final String id = "test"; + CompositeIndexWriter compositeIndexWriter = null; + try { + compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.deleteDocument( + operation.uid(), + false, + newDeleteTombstoneDoc(id), + 1, + 2, + primaryTerm.get(), + softDeletesField + ); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + try (DirectoryReader directoryReader = DirectoryReader.open(compositeIndexWriter.getAccumulatingIndexWriter())) { + assertEquals(0, directoryReader.numDocs()); + } + } finally { + if (compositeIndexWriter != null) { + IOUtils.closeWhileHandlingException(compositeIndexWriter); + } + } + } + + public void testDeleteWithDocumentInChildWriter() throws IOException { + final String id = "test"; + CompositeIndexWriter compositeIndexWriter = null; + try { + compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + compositeIndexWriter.deleteDocument( + operation.uid(), + false, + newDeleteTombstoneDoc(id), + 1, + 2, + primaryTerm.get(), + softDeletesField + ); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + + try (DirectoryReader directoryReader = DirectoryReader.open(compositeIndexWriter.getAccumulatingIndexWriter())) { + assertEquals(0, directoryReader.numDocs()); + } + } finally { + if (compositeIndexWriter != null) { + IOUtils.closeWhileHandlingException(compositeIndexWriter); + } + } + } + + public void testDeleteWithDocumentInBothChildAndParentWriter() throws IOException { + final String id = "test"; + CompositeIndexWriter compositeIndexWriter = null; + try { + compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + + operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.softUpdateDocuments(operation.uid(), operation.docs(), 2, 2, primaryTerm.get(), softDeletesField); + compositeIndexWriter.deleteDocument( + operation.uid(), + false, + newDeleteTombstoneDoc(id), + 1, + 2, + primaryTerm.get(), + softDeletesField + ); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + try (DirectoryReader directoryReader = DirectoryReader.open(compositeIndexWriter.getAccumulatingIndexWriter())) { + assertEquals(0, directoryReader.numDocs()); + } + } finally { + if (compositeIndexWriter != null) { + IOUtils.closeWhileHandlingException(compositeIndexWriter); + } + } + } + + public void testUpdateWithDocumentInParentIndexWriter() throws IOException { + final String id = "test"; + CompositeIndexWriter compositeIndexWriter = null; + try { + compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.softUpdateDocuments(operation.uid(), operation.docs(), 2, 2, primaryTerm.get(), softDeletesField); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + try (DirectoryReader directoryReader = DirectoryReader.open(compositeIndexWriter.getAccumulatingIndexWriter())) { + assertEquals(1, directoryReader.numDocs()); + } + } finally { + if (compositeIndexWriter != null) { + IOUtils.closeWhileHandlingException(compositeIndexWriter); + } + } + } + + public void testUpdateWithDocumentInChildIndexWriter() throws IOException { + final String id = "test"; + CompositeIndexWriter compositeIndexWriter = null; + try { + compositeIndexWriter = new CompositeIndexWriter( + config(), + createWriter(), + newSoftDeletesPolicy(), + softDeletesField, + indexWriterFactory + ); + Engine.Index operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.addDocuments(operation.docs(), operation.uid()); + } + + operation = indexForDoc(createParsedDoc(id, null, DEFAULT_CRITERIA)); + try (Releasable ignore1 = compositeIndexWriter.acquireLock(operation.uid().bytes())) { + compositeIndexWriter.softUpdateDocuments(operation.uid(), operation.docs(), 2, 2, primaryTerm.get(), softDeletesField); + } + + compositeIndexWriter.beforeRefresh(); + compositeIndexWriter.afterRefresh(true); + try (DirectoryReader directoryReader = DirectoryReader.open(compositeIndexWriter.getAccumulatingIndexWriter())) { + assertEquals(1, directoryReader.numDocs()); + } + } finally { + if (compositeIndexWriter != null) { + IOUtils.close(compositeIndexWriter); + } + } + } + +} diff --git a/server/src/test/java/org/opensearch/index/engine/CriteriaBasedCompositeIndexWriterBaseTests.java b/server/src/test/java/org/opensearch/index/engine/CriteriaBasedCompositeIndexWriterBaseTests.java new file mode 100644 index 0000000000000..1742f4e481398 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/engine/CriteriaBasedCompositeIndexWriterBaseTests.java @@ -0,0 +1,489 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ReferenceManager; +import org.apache.lucene.search.Sort; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.BytesRef; +import org.opensearch.Version; +import org.opensearch.action.support.replication.ReplicationResponse; +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.AllocationId; +import org.opensearch.common.CheckedBiFunction; +import org.opensearch.common.Nullable; +import org.opensearch.common.Randomness; +import org.opensearch.common.compress.CompressedXContent; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.BigArrays; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.indices.breaker.CircuitBreakerService; +import org.opensearch.core.indices.breaker.NoneCircuitBreakerService; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.BucketedCompositeDirectory; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.MapperTestUtils; +import org.opensearch.index.codec.CodecService; +import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.IdFieldMapper; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.Mapping; +import org.opensearch.index.mapper.ParseContext; +import org.opensearch.index.mapper.ParsedDocument; +import org.opensearch.index.mapper.SeqNoFieldMapper; +import org.opensearch.index.mapper.SourceFieldMapper; +import org.opensearch.index.mapper.SourceToParse; +import org.opensearch.index.mapper.Uid; +import org.opensearch.index.mapper.VersionFieldMapper; +import org.opensearch.index.seqno.ReplicationTracker; +import org.opensearch.index.seqno.RetentionLeases; +import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.store.FsDirectoryFactory; +import org.opensearch.index.store.Store; +import org.opensearch.index.translog.TranslogConfig; +import org.opensearch.test.DummyShardLock; +import org.opensearch.test.IndexSettingsModule; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.LongSupplier; +import java.util.function.Supplier; + +import static java.util.Collections.emptyList; +import static org.opensearch.index.engine.Engine.Operation.Origin.PEER_RECOVERY; +import static org.opensearch.index.engine.Engine.Operation.Origin.REPLICA; +import static org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED; + +public class CriteriaBasedCompositeIndexWriterBaseTests extends OpenSearchTestCase { + protected static final IndexSettings INDEX_SETTINGS = IndexSettingsModule.newIndexSettings("index", Settings.EMPTY); + protected final EngineTestCase.PrimaryTermSupplier primaryTerm = new EngineTestCase.PrimaryTermSupplier(1L); + protected Store store; + protected Path primaryTranslogDir; + protected ThreadPool threadPool; + protected final ShardId shardId = new ShardId(new Index("index", "_na_"), 0); + protected final AllocationId allocationId = AllocationId.newInitializing(); + protected final NumericDocValuesField softDeletesField = Lucene.newSoftDeletesField(); + public static final String DEFAULT_CRITERIA = "testGroupingCriteria"; + protected IndexWriterFactory indexWriterFactory; + + protected static ParsedDocument createParsedDoc(String id, String routing, String groupingCriteria) { + return testParsedDocument( + id, + routing, + testDocumentWithTextField(), + new BytesArray("{ \"value\" : \"test\" }"), + null, + groupingCriteria + ); + } + + protected static ParsedDocument testParsedDocument( + String id, + String routing, + ParseContext.Document document, + BytesReference source, + Mapping mappingUpdate, + String groupingCriteria + ) { + return testParsedDocument(id, routing, document, source, mappingUpdate, false, groupingCriteria); + } + + protected static ParsedDocument testParsedDocument( + String id, + String routing, + ParseContext.Document document, + BytesReference source, + Mapping mappingUpdate, + boolean recoverySource, + String groupingCriteria + ) { + Field uidField = new Field("_id", Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE); + Field versionField = new NumericDocValuesField("_version", 0); + SeqNoFieldMapper.SequenceIDFields seqID = SeqNoFieldMapper.SequenceIDFields.emptySeqID(); + document.add(uidField); + document.add(versionField); + document.add(seqID.seqNo); + document.add(seqID.seqNoDocValue); + document.add(seqID.primaryTerm); + document.setGroupingCriteria(groupingCriteria); + if (source != null) { + BytesRef ref = source.toBytesRef(); + if (recoverySource) { + document.add(new StoredField(SourceFieldMapper.RECOVERY_SOURCE_NAME, ref.bytes, ref.offset, ref.length)); + document.add(new NumericDocValuesField(SourceFieldMapper.RECOVERY_SOURCE_NAME, 1)); + } else { + document.add(new StoredField(SourceFieldMapper.NAME, ref.bytes, ref.offset, ref.length)); + } + } + return new ParsedDocument(versionField, seqID, id, routing, Arrays.asList(document), source, MediaTypeRegistry.JSON, mappingUpdate); + } + + protected static ParseContext.Document testDocumentWithTextField() { + return testDocumentWithTextField("test"); + } + + protected static ParseContext.Document testDocumentWithTextField(String value) { + ParseContext.Document document = testDocument(); + document.add(new TextField("value", value, Field.Store.YES)); + return document; + } + + protected static ParseContext.Document testDocument() { + return new ParseContext.Document(); + } + + protected Engine.Index indexForDoc(ParsedDocument doc) { + return new Engine.Index(newUid(doc), primaryTerm.get(), doc); + } + + protected static Term newUid(String id) { + return new Term("_id", Uid.encodeId(id)); + } + + protected static Term newUid(ParsedDocument doc) { + return newUid(doc.id()); + } + + protected IndexWriter createWriter() throws IOException { + return createWriter(store.directory()); + } + + protected IndexWriter createWriter(Directory directory) throws IOException { + try { + return new IndexWriter(directory, newIndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETES_FIELD).setCommitOnClose(true)); + } catch (LockObtainFailedException ex) { + logger.warn("could not lock IndexWriter", ex); + throw ex; + } + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + store = createStore(INDEX_SETTINGS, newDirectory()); + primaryTranslogDir = createTempDir("translog-primary"); + threadPool = new TestThreadPool(getClass().getName()); + primaryTranslogDir = createTempDir("translog-primary"); + indexWriterFactory = new NativeLuceneIndexWriterFactory(); + } + + protected Store createStore(final IndexSettings indexSettings, final Directory directory) throws IOException { + final Path path = createTempDir().resolve(shardId.getIndex().getUUID()).resolve(String.valueOf(shardId.id())); + final ShardPath shardPath = new ShardPath(false, path, path, shardId); + return new Store( + shardId, + indexSettings, + directory, + new DummyShardLock(shardId), + Store.OnClose.EMPTY, + shardPath, + new FsDirectoryFactory() + ); + } + + public EngineConfig config() { + return config( + INDEX_SETTINGS, + store, + primaryTranslogDir, + NoMergePolicy.INSTANCE, + null, + null, + null, + null, + null, + new NoneCircuitBreakerService(), + null + ); + } + + public EngineConfig config( + final IndexSettings indexSettings, + final Store store, + final Path translogPath, + final MergePolicy mergePolicy, + final ReferenceManager.RefreshListener externalRefreshListener, + final ReferenceManager.RefreshListener internalRefreshListener, + final Sort indexSort, + final @Nullable LongSupplier maybeGlobalCheckpointSupplier, + final @Nullable Supplier maybeRetentionLeasesSupplier, + final CircuitBreakerService breakerService, + final Engine.EventListener eventListener + ) { + final IndexWriterConfig iwc = newIndexWriterConfig(); + final TranslogConfig translogConfig = new TranslogConfig( + shardId, + translogPath, + indexSettings, + BigArrays.NON_RECYCLING_INSTANCE, + "", + false + ); + final List extRefreshListenerList = externalRefreshListener == null + ? emptyList() + : Collections.singletonList(externalRefreshListener); + final List intRefreshListenerList = internalRefreshListener == null + ? emptyList() + : Collections.singletonList(internalRefreshListener); + final LongSupplier globalCheckpointSupplier; + final Supplier retentionLeasesSupplier; + if (maybeGlobalCheckpointSupplier == null) { + assert maybeRetentionLeasesSupplier == null; + final ReplicationTracker replicationTracker = new ReplicationTracker( + shardId, + allocationId.getId(), + indexSettings, + randomNonNegativeLong(), + SequenceNumbers.NO_OPS_PERFORMED, + update -> {}, + () -> 0L, + (leases, listener) -> listener.onResponse(new ReplicationResponse()), + () -> SafeCommitInfo.EMPTY, + sId -> false + ); + globalCheckpointSupplier = replicationTracker; + retentionLeasesSupplier = replicationTracker::getRetentionLeases; + } else { + assert maybeRetentionLeasesSupplier != null; + globalCheckpointSupplier = maybeGlobalCheckpointSupplier; + retentionLeasesSupplier = maybeRetentionLeasesSupplier; + } + return new EngineConfig.Builder().shardId(shardId) + .threadPool(threadPool) + .indexSettings(indexSettings) + .warmer(null) + .store(store) + .mergePolicy(mergePolicy) + .analyzer(iwc.getAnalyzer()) + .similarity(iwc.getSimilarity()) + .codecService(new CodecService(null, indexSettings, logger)) + .eventListener(eventListener) + .queryCache(IndexSearcher.getDefaultQueryCache()) + .queryCachingPolicy(IndexSearcher.getDefaultQueryCachingPolicy()) + .translogConfig(translogConfig) + .flushMergesAfter(TimeValue.timeValueMinutes(5)) + .externalRefreshListener(extRefreshListenerList) + .internalRefreshListener(intRefreshListenerList) + .indexSort(indexSort) + .circuitBreakerService(breakerService) + .globalCheckpointSupplier(globalCheckpointSupplier) + .retentionLeasesSupplier(retentionLeasesSupplier) + .primaryTermSupplier(primaryTerm) + .build(); + } + + protected ParseContext.Document newDeleteTombstoneDoc(String id) { + final ParseContext.Document doc = new ParseContext.Document(); + Field uidField = new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE); + doc.add(uidField); + Field versionField = new NumericDocValuesField(VersionFieldMapper.NAME, 0); + doc.add(versionField); + SeqNoFieldMapper.SequenceIDFields seqID = SeqNoFieldMapper.SequenceIDFields.emptySeqID(); + doc.add(seqID.seqNo); + doc.add(seqID.seqNoDocValue); + doc.add(seqID.primaryTerm); + seqID.tombstoneField.setLongValue(1); + doc.add(seqID.tombstoneField); + doc.add(softDeletesField); + return doc; + } + + public List generateHistoryOnReplica( + int numOps, + boolean allowGapInSeqNo, + boolean allowDuplicate, + boolean includeNestedDocs, + Engine.Operation.TYPE opType + ) throws Exception { + long seqNo = 0; + final int maxIdValue = randomInt(numOps * 2); + final List operations = new ArrayList<>(numOps); + CheckedBiFunction nestedParsedDocFactory = nestedParsedDocFactory(); + for (int i = 0; i < numOps; i++) { + final String id = Integer.toString(randomInt(maxIdValue)); + final boolean isNestedDoc = includeNestedDocs && opType == Engine.Operation.TYPE.INDEX && randomBoolean(); + final int nestedValues = between(0, 3); + final long startTime = threadPool.relativeTimeInNanos(); + final int copies = allowDuplicate && rarely() ? between(2, 4) : 1; + for (int copy = 0; copy < copies; copy++) { + final ParsedDocument doc = isNestedDoc + ? nestedParsedDocFactory.apply(id, nestedValues) + : createParsedDoc(id, null, DEFAULT_CRITERIA); + switch (opType) { + case INDEX: + operations.add( + new Engine.Index( + EngineTestCase.newUid(doc), + doc, + seqNo, + primaryTerm.get(), + i, + null, + randomFrom(REPLICA, PEER_RECOVERY), + startTime, + -1, + true, + SequenceNumbers.UNASSIGNED_SEQ_NO, + 0 + ) + ); + break; + case DELETE: + operations.add( + new Engine.Delete( + doc.id(), + EngineTestCase.newUid(doc), + seqNo, + primaryTerm.get(), + i, + null, + randomFrom(REPLICA, PEER_RECOVERY), + startTime, + SequenceNumbers.UNASSIGNED_SEQ_NO, + 0 + ) + ); + break; + case NO_OP: + operations.add( + new Engine.NoOp(seqNo, primaryTerm.get(), randomFrom(REPLICA, PEER_RECOVERY), startTime, "test-" + i) + ); + break; + default: + throw new IllegalStateException("Unknown operation type [" + opType + "]"); + } + } + seqNo++; + if (allowGapInSeqNo && rarely()) { + seqNo++; + } + } + Randomness.shuffle(operations); + return operations; + } + + public static CheckedBiFunction nestedParsedDocFactory() throws Exception { + final MapperService mapperService = createMapperService(); + final String nestedMapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type") + .startObject("properties") + .startObject("nested_field") + .field("type", "nested") + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + final DocumentMapper nestedMapper = mapperService.documentMapperParser().parse("type", new CompressedXContent(nestedMapping)); + return (docId, nestedFieldValues) -> { + final XContentBuilder source = XContentFactory.jsonBuilder().startObject().field("field", "value"); + if (nestedFieldValues > 0) { + XContentBuilder nestedField = source.startObject("nested_field"); + for (int i = 0; i < nestedFieldValues; i++) { + nestedField.field("field-" + i, "value-" + i); + } + source.endObject(); + } + source.endObject(); + return nestedMapper.parse(new SourceToParse("test", docId, BytesReference.bytes(source), MediaTypeRegistry.JSON)); + }; + } + + public static MapperService createMapperService() throws IOException { + return createMapperService("{\"properties\": {}}"); + } + + public static MapperService createMapperService(String mapping) throws IOException { + IndexMetadata indexMetadata = IndexMetadata.builder("test") + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + ) + .putMapping(mapping) + .build(); + MapperService mapperService = MapperTestUtils.newMapperService( + new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), + createTempDir(), + Settings.EMPTY, + "test" + ); + mapperService.merge(indexMetadata, MapperService.MergeReason.MAPPING_UPDATE); + return mapperService; + } + + protected CompositeIndexWriter.DisposableIndexWriter createChildWriterFactory( + String criteria, + CompositeIndexWriter.CriteriaBasedIndexWriterLookup lookup + ) throws IOException { + return new CompositeIndexWriter.DisposableIndexWriter( + new IndexWriter( + store.newTempDirectory(BucketedCompositeDirectory.CHILD_DIRECTORY_PREFIX + criteria + "_" + UUID.randomUUID()), + newIndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETES_FIELD).setCommitOnClose(true) + ), + lookup + ); + } + + @Override + @After + public void tearDown() throws Exception { + super.tearDown(); + IOUtils.close(store, () -> terminate(threadPool)); + } + + public SoftDeletesPolicy newSoftDeletesPolicy() { + final AtomicLong globalCheckpoint = new AtomicLong(); + final int extraRetainedOps = between(0, 100); + final SoftDeletesPolicy softDeletesPolicy = new SoftDeletesPolicy( + globalCheckpoint::get, + NO_OPS_PERFORMED, + extraRetainedOps, + () -> RetentionLeases.EMPTY + ); + + return softDeletesPolicy; + } +} diff --git a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java index bb5a1eb568108..a81149538be02 100644 --- a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java @@ -209,6 +209,7 @@ import java.util.stream.LongStream; import static java.util.Collections.shuffle; +import static org.opensearch.common.util.FeatureFlags.CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG; import static org.opensearch.index.engine.Engine.Operation.Origin.LOCAL_RESET; import static org.opensearch.index.engine.Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY; import static org.opensearch.index.engine.Engine.Operation.Origin.PEER_RECOVERY; @@ -805,6 +806,9 @@ public void testMergeSegmentsOnCommitDefault() throws Exception { engine.refresh("test"); segments = engine.segments(true); + // This works for regular scenario because merges are triggered by preparePointInTimeMerge by refresh, which is a blocking + // merge. + // In context aware scenario, addIndexes triggers a non blocking merge before refresh triggers it, so this test case fails assertThat(segments.size(), equalTo(1)); } } @@ -1029,7 +1033,7 @@ public void testTranslogRecoveryDoesNotReplayIntoTranslog() throws IOException { recoveringEngine = new InternalEngine(initialEngine.config()) { @Override - protected void commitIndexWriter(IndexWriter writer, String translogUUID) throws IOException { + protected void commitIndexWriter(DocumentIndexWriter writer, String translogUUID) throws IOException { committed.set(true); super.commitIndexWriter(writer, translogUUID); } @@ -3616,7 +3620,7 @@ public void testUnreferencedFileCleanUpFailsOnSegmentMergeFailureWhenDirectoryCl @Override public void eval(MockDirectoryWrapper dir) throws IOException { - if (callStackContainsAnyOf("mergeTerms")) { + if (callStackContainsAnyOf("mergeWithLogging")) { throw new IOException("No space left on device"); } } @@ -3964,7 +3968,7 @@ public void testTranslogCleanUpPostCommitCrash() throws Exception { ) { @Override - protected void commitIndexWriter(IndexWriter writer, String translogUUID) throws IOException { + protected void commitIndexWriter(DocumentIndexWriter writer, String translogUUID) throws IOException { super.commitIndexWriter(writer, translogUUID); if (throwErrorOnCommit.get()) { throw new RuntimeException("power's out"); @@ -6296,7 +6300,7 @@ public void testKeepTranslogAfterGlobalCheckpoint() throws Exception { final AtomicLong lastSyncedGlobalCheckpointBeforeCommit = new AtomicLong(Translog.readGlobalCheckpoint(translogPath, translogUUID)); try (InternalEngine engine = new InternalEngine(engineConfig) { @Override - protected void commitIndexWriter(IndexWriter writer, String translogUUID) throws IOException { + protected void commitIndexWriter(DocumentIndexWriter writer, String translogUUID) throws IOException { lastSyncedGlobalCheckpointBeforeCommit.set(Translog.readGlobalCheckpoint(translogPath, translogUUID)); // Advance the global checkpoint during the flush to create a lag between a persisted global checkpoint in the translog // (this value is visible to the deletion policy) and an in memory global checkpoint in the SequenceNumbersService. @@ -8532,6 +8536,147 @@ public void testNewChangesSnapshotWithDeleteAndUpdateWithDerivedSource() throws } } + private static class AddIndexesFailingIndexWriter extends IndexWriter { + + private AtomicReference> failureToThrow = new AtomicReference<>(); + + /** + * Constructs a new IndexWriter per the settings given in conf. If you want to make + * "live" changes to this writer instance, use {@link #getConfig()}. + * + *

NOTE: after ths writer is created, the given configuration instance cannot be passed + * to another writer. + * + * @param d the index directory. The index is either created or appended according + * conf.getOpenMode(). + * @param conf the configuration settings according to which IndexWriter should be initialized. + * @throws IOException if the directory cannot be read/written to, or if it does not exist and + * conf.getOpenMode() is OpenMode.APPEND or if there is any other + * low-level IO error + */ + public AddIndexesFailingIndexWriter(Directory d, IndexWriterConfig conf) throws IOException { + super(d, conf); + } + + @Override + public long addIndexes(Directory... dirs) throws IOException { + maybeThrowFailure(); + return super.addIndexes(dirs); + } + + private void maybeThrowFailure() throws IOException { + if (failureToThrow.get() != null) { + Exception failure = failureToThrow.get().get(); + clearFailure(); // one shot + if (failure instanceof RuntimeException) { + throw (RuntimeException) failure; + } else if (failure instanceof IOException) { + throw (IOException) failure; + } else { + assert false : "unsupported failure class: " + failure.getClass().getCanonicalName(); + } + } + } + + public void setThrowFailure(Supplier failureSupplier) { + failureToThrow.set(failureSupplier); + } + + public void clearFailure() { + failureToThrow.set(null); + } + } + + @LockFeatureFlag(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG) + public void testShardFailsForCompositeIndexWriterInCaseAddIndexesThrewExceptionWithAppend() throws IOException, InterruptedException { + MockDirectoryWrapper wrapper = newMockDirectory(); + final Path translogPath = createTempDir("testFailEngineOnRandomIO"); + try (Store store = createStore(wrapper)) { + final ParsedDocument doc1 = testParsedDocument("1", null, testContextSpecificDocument(), B_1, null); + final ParsedDocument doc2 = testParsedDocument("2", null, testContextSpecificDocument(), B_1, null); + final ParsedDocument doc3 = testParsedDocument("3", null, testContextSpecificDocument(), B_1, null); + + AtomicReference throwingIndexWriter = new AtomicReference<>(); + final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings( + "test", + Settings.builder() + .put(defaultSettings.getSettings()) + .put(IndexSettings.INDEX_CONTEXT_AWARE_ENABLED_SETTING.getKey(), true) + .build() + ); + try (InternalEngine engine = createEngine(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, (directory, iwc) -> { + throwingIndexWriter.set(new AddIndexesFailingIndexWriter(directory, iwc)); + return throwingIndexWriter.get(); + })) { + // test document failure while indexing + if (randomBoolean()) { + throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated")); + } else { + throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length")); + } + // test index with document failure + engine.index(indexForDoc(doc1)); + engine.index(indexForDoc(doc2)); + engine.index(indexForDoc(doc3)); + try { + engine.refresh("testing"); + fail(); + } catch (AlreadyClosedException ex) { + if (ex.getCause() != null) { + assertTrue(ex.toString(), ex.getCause() instanceof MockDirectoryWrapper.FakeIOException); + } + } catch (RefreshFailedEngineException ex) { + // fine + } + } + } + } + + @LockFeatureFlag(CONTEXT_AWARE_MIGRATION_EXPERIMENTAL_FLAG) + public void testShardFailsForCompositeIndexWriterInCaseAddIndexesThrewExceptionWithUpdate() throws IOException, InterruptedException { + MockDirectoryWrapper wrapper = newMockDirectory(); + final Path translogPath = createTempDir("testFailEngineOnRandomIO"); + try (Store store = createStore(wrapper)) { + final ParsedDocument doc1 = testParsedDocument("1", null, testContextSpecificDocument(), B_1, null); + final ParsedDocument doc2 = testParsedDocument("2", null, testContextSpecificDocument(), B_1, null); + final ParsedDocument doc3 = testParsedDocument("1", null, testContextSpecificDocument(), B_1, null); + final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings( + "test", + Settings.builder() + .put(defaultSettings.getSettings()) + .put(IndexSettings.INDEX_CONTEXT_AWARE_ENABLED_SETTING.getKey(), true) + .build() + ); + + AtomicReference throwingIndexWriter = new AtomicReference<>(); + try (InternalEngine engine = createEngine(indexSettings, store, createTempDir(), NoMergePolicy.INSTANCE, (directory, iwc) -> { + throwingIndexWriter.set(new AddIndexesFailingIndexWriter(directory, iwc)); + return throwingIndexWriter.get(); + })) { + // test document failure while indexing + if (randomBoolean()) { + throwingIndexWriter.get().setThrowFailure(() -> new IOException("simulated")); + } else { + throwingIndexWriter.get().setThrowFailure(() -> new IllegalArgumentException("simulated max token length")); + } + // test index with document failure + engine.index(indexForDoc(doc1)); + engine.index(indexForDoc(doc2)); + engine.index(indexForDoc(doc3)); + try { + engine.refresh("testing"); + fail(); + } catch (AlreadyClosedException ex) { + if (ex.getCause() != null) { + assertTrue(ex.toString(), ex.getCause() instanceof MockDirectoryWrapper.FakeIOException); + } + } catch (RefreshFailedEngineException ex) { + // fine + } + } + } + } + private EngineConfig createEngineConfigWithMapperSupplierForDerivedSource(Store store) throws IOException { // Setup with derived source enabled Settings settings = Settings.builder() diff --git a/server/src/test/java/org/opensearch/index/store/StoreTests.java b/server/src/test/java/org/opensearch/index/store/StoreTests.java index ae9d179b291f0..a0f5dc6c3ec39 100644 --- a/server/src/test/java/org/opensearch/index/store/StoreTests.java +++ b/server/src/test/java/org/opensearch/index/store/StoreTests.java @@ -796,7 +796,7 @@ public void testOnCloseCallback() throws IOException { assertEquals(shardId, theLock.getShardId()); assertEquals(lock, theLock); count.incrementAndGet(); - }, null); + }, null, null); assertEquals(count.get(), 0); final int iters = randomIntBetween(1, 10); @@ -821,7 +821,8 @@ public void testStoreShardPath() { StoreTests.newDirectory(random()), new DummyShardLock(shardId), Store.OnClose.EMPTY, - shardPath + shardPath, + null ); assertEquals(shardPath, store.shardPath()); store.close(); diff --git a/server/src/test/java/org/opensearch/plugins/IndexStorePluginTests.java b/server/src/test/java/org/opensearch/plugins/IndexStorePluginTests.java index 0bb6d4d74f6da..c43b5592f1be1 100644 --- a/server/src/test/java/org/opensearch/plugins/IndexStorePluginTests.java +++ b/server/src/test/java/org/opensearch/plugins/IndexStorePluginTests.java @@ -129,6 +129,19 @@ public Map getStoreFactories() { } public static class TestStoreFactory implements IndexStorePlugin.StoreFactory { + @Override + public Store newStore( + ShardId shardId, + IndexSettings indexSettings, + org.apache.lucene.store.Directory directory, + ShardLock shardLock, + Store.OnClose onClose, + ShardPath shardPath, + IndexStorePlugin.DirectoryFactory directoryFactory + ) throws IOException { + return new Store(shardId, indexSettings, directory, shardLock, onClose, shardPath, directoryFactory); + } + @Override public Store newStore( ShardId shardId, diff --git a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java index b700a6bf4bb67..e4e56cddd21d2 100644 --- a/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/engine/EngineTestCase.java @@ -112,6 +112,8 @@ import org.opensearch.index.seqno.ReplicationTracker; import org.opensearch.index.seqno.RetentionLeases; import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.store.FsDirectoryFactory; import org.opensearch.index.store.Store; import org.opensearch.index.translog.InternalTranslogManager; import org.opensearch.index.translog.LocalTranslog; @@ -364,6 +366,12 @@ protected void assertEngineCleanedUp(Engine engine, TranslogDeletionPolicy trans } } + protected static ParseContext.Document testContextSpecificDocument() { + ParseContext.Document doc = testDocumentWithTextField("criteria"); + doc.setGroupingCriteria("grouping_criteria"); + return doc; + } + protected static ParseContext.Document testDocumentWithTextField() { return testDocumentWithTextField("test"); } @@ -525,7 +533,17 @@ protected Store createStore(final Directory directory) throws IOException { } protected Store createStore(final IndexSettings indexSettings, final Directory directory) throws IOException { - return new Store(shardId, indexSettings, directory, new DummyShardLock(shardId)); + final Path path = createTempDir().resolve(shardId.getIndex().getUUID()).resolve(String.valueOf(shardId.id())); + final ShardPath shardPath = new ShardPath(false, path, path, shardId); + return new Store( + shardId, + indexSettings, + directory, + new DummyShardLock(shardId), + Store.OnClose.EMPTY, + shardPath, + new FsDirectoryFactory() + ); } protected Translog createTranslog(LongSupplier primaryTermSupplier) throws IOException { diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 36f4e77228325..71e23c4c26787 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -110,6 +110,7 @@ import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.similarity.SimilarityService; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; +import org.opensearch.index.store.FsDirectoryFactory; import org.opensearch.index.store.RemoteBufferedOutputDirectory; import org.opensearch.index.store.RemoteDirectory; import org.opensearch.index.store.RemoteSegmentStoreDirectory; @@ -290,7 +291,15 @@ protected Store createStore(IndexSettings indexSettings, ShardPath shardPath) th } protected Store createStore(ShardId shardId, IndexSettings indexSettings, Directory directory, ShardPath shardPath) throws IOException { - return new Store(shardId, indexSettings, directory, new DummyShardLock(shardId), Store.OnClose.EMPTY, shardPath); + return new Store( + shardId, + indexSettings, + directory, + new DummyShardLock(shardId), + Store.OnClose.EMPTY, + shardPath, + new FsDirectoryFactory() + ); } protected Releasable acquirePrimaryOperationPermitBlockingly(IndexShard indexShard) throws ExecutionException, InterruptedException { diff --git a/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java b/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java index 12817ec1ab6cd..8c69e1450a920 100644 --- a/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java +++ b/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java @@ -42,6 +42,7 @@ import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.opensearch.index.BucketedCompositeDirectory; import java.io.IOException; import java.nio.ByteBuffer; @@ -64,12 +65,18 @@ private CorruptionUtils() {} public static void corruptIndex(Random random, Path indexPath, boolean corruptSegments) throws IOException { // corrupt files + // TODO: Till the time we remove sub directories during refresh, we also filter out tem directories files here as + // them will not make sense. final Path[] filesToCorrupt = Files.walk(indexPath).filter(p -> { final String name = p.getFileName().toString(); boolean segmentFile = name.startsWith("segments_") || name.endsWith(".si"); return Files.isRegularFile(p) && name.startsWith("extra") == false // Skip files added by Lucene's ExtrasFS + && !p.toAbsolutePath().getParent().toString().contains(BucketedCompositeDirectory.CHILD_DIRECTORY_PREFIX) // Skip child + // level + // directories && IndexWriter.WRITE_LOCK_NAME.equals(name) == false + && (corruptSegments ? segmentFile : segmentFile == false); }).toArray(Path[]::new); corruptFile(random, filesToCorrupt); diff --git a/test/framework/src/main/java/org/opensearch/test/store/MockFSDirectoryFactory.java b/test/framework/src/main/java/org/opensearch/test/store/MockFSDirectoryFactory.java index d8279170ddd92..6d021e3b18ed9 100644 --- a/test/framework/src/main/java/org/opensearch/test/store/MockFSDirectoryFactory.java +++ b/test/framework/src/main/java/org/opensearch/test/store/MockFSDirectoryFactory.java @@ -40,7 +40,9 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.tests.store.MockDirectoryWrapper; import org.apache.lucene.tests.util.LuceneTestCase; @@ -66,6 +68,7 @@ import java.io.IOException; import java.io.PrintStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.util.Arrays; import java.util.List; import java.util.Random; @@ -105,6 +108,11 @@ public Directory newDirectory(IndexSettings idxSettings, ShardPath path) throws return wrap(randomDirectoryService(random, idxSettings, path), random, indexSettings, path.getShardId()); } + @Override + public Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { + return new NIOFSDirectory(location, lockFactory); + } + public static void checkIndex(Logger logger, Store store, ShardId shardId) { if (store.tryIncRef()) { logger.info("start check index");