opensearch-project
diff --git a/‎CHANGELOG.md
+1 b/‎CHANGELOG.md
+1
diff --git a/‎src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java
+66-4 b/‎src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java
+66-4
diff --git a/‎src/main/java/org/opensearch/knn/index/memory/NativeMemoryEntryContext.java
+81-1 b/‎src/main/java/org/opensearch/knn/index/memory/NativeMemoryEntryContext.java
+81-1
diff --git a/‎src/main/java/org/opensearch/knn/index/memory/NativeMemoryLoadStrategy.java
+10-7 b/‎src/main/java/org/opensearch/knn/index/memory/NativeMemoryLoadStrategy.java
+10-7
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 * [Remote Vector Index Build] Introduce Remote Native Index Build feature flag, settings, and initial skeleton [#2525](https://github.com/opensearch-project/k-NN/pull/2525)
 * [Remote Vector Index Build] Implement vector data upload and vector data size threshold setting [#2550](https://github.com/opensearch-project/k-NN/pull/2550)
 * [Remote Vector Index Build] Implement data download and IndexOutput write functionality [#2554](https://github.com/opensearch-project/k-NN/pull/2554)
+* Add concurrency optimizations with native memory graph loading and force eviction (#2265) [https://github.com/opensearch-project/k-NN/pull/2345]
 ### Enhancements
 * Introduce node level circuit breakers for k-NN [#2509](https://github.com/opensearch-project/k-NN/pull/2509)
 ### Bug Fixes
 
@@ -35,12 +35,14 @@
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedDeque;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantLock;
 
 /**
  * Manages native memory allocations made by JNI.
@@ -56,6 +58,7 @@ public class NativeMemoryCacheManager implements Closeable {
 
     private Cache<String, NativeMemoryAllocation> cache;
     private Deque<String> accessRecencyQueue;
+    private final ConcurrentHashMap<String, ReentrantLock> indexLocks = new ConcurrentHashMap<>();
     private final ExecutorService executor;
     private AtomicBoolean cacheCapacityReached;
     private long maxWeight;
@@ -306,6 +309,55 @@ public CacheStats getCacheStats() {
         return cache.stats();
     }
 
+    /**
+     * Opens a vector index with proper locking mechanism to ensure thread safety.
+     * The method uses a ReentrantLock to synchronize access to the index file and
+     * cleans up the lock when no other threads are waiting.
+     *
+     * @param key the unique identifier for the index
+     * @param nativeMemoryEntryContext the context containing vector index information
+     */
+    private void open(String key, NativeMemoryEntryContext nativeMemoryEntryContext) {
+        ReentrantLock indexFileLock = indexLocks.computeIfAbsent(key, k -> new ReentrantLock());
+        try {
+            indexFileLock.lock();
+            nativeMemoryEntryContext.open();
+        } finally {
+            indexFileLock.unlock();
+            if (!indexFileLock.hasQueuedThreads()) {
+                indexLocks.remove(key, indexFileLock);
+            }
+        }
+    }
+
+    /**
+     * Retrieves an entry from the cache and updates its access recency if found.
+     * This method combines cache access with recency queue management to maintain
+     * the least recently used (LRU) order of cached entries.
+     *
+     * @param key the unique identifier for the cached entry
+     * @return the cached NativeMemoryAllocation if present, null otherwise
+     */
+    private NativeMemoryAllocation getFromCacheAndUpdateRecency(String key) {
+        NativeMemoryAllocation result = cache.getIfPresent(key);
+        if (result != null) {
+            updateAccessRecency(key);
+        }
+        return result;
+    }
+
+    /**
+     * Updates the access recency of a cached entry by moving it to the end of the queue.
+     * This method maintains the least recently used (LRU) order by removing the entry
+     * from its current position and adding it to the end of the queue.
+     *
+     * @param key the unique identifier for the cached entry whose recency needs to be updated
+     */
+    private void updateAccessRecency(String key) {
+        accessRecencyQueue.remove(key);
+        accessRecencyQueue.addLast(key);
+    }
+
     /**
      * Retrieves NativeMemoryAllocation associated with the nativeMemoryEntryContext.
      *
@@ -338,23 +390,28 @@ public NativeMemoryAllocation get(NativeMemoryEntryContext<?> nativeMemoryEntryC
             // In case of a cache miss, least recently accessed entries are evicted in a blocking manner
             // before the new entry can be added to the cache.
             String key = nativeMemoryEntryContext.getKey();
-            NativeMemoryAllocation result = cache.getIfPresent(key);
 
             // Cache Hit
             // In case of a cache hit, moving the item to the end of the recency queue adds
             // some overhead to the get operation. This can be optimized further to make this operation
             // as lightweight as possible. Multiple approaches and their outcomes were documented
             // before moving forward with the current solution.
             // The details are outlined here: https://github.com/opensearch-project/k-NN/pull/2015#issuecomment-2327064680
+            NativeMemoryAllocation result = getFromCacheAndUpdateRecency(key);
             if (result != null) {
-                accessRecencyQueue.remove(key);
-                accessRecencyQueue.addLast(key);
                 return result;
             }
 
             // Cache Miss
             // Evict before put
+            // open the graph file before proceeding to load the graph into memory
+            open(key, nativeMemoryEntryContext);
             synchronized (this) {
+                // recheck if another thread already loaded this entry into the cache
+                result = getFromCacheAndUpdateRecency(key);
+                if (result != null) {
+                    return result;
+                }
                 if (getCacheSizeInKilobytes() + nativeMemoryEntryContext.calculateSizeInKB() >= maxWeight) {
                     Iterator<String> lruIterator = accessRecencyQueue.iterator();
                     while (lruIterator.hasNext()
@@ -376,7 +433,12 @@ public NativeMemoryAllocation get(NativeMemoryEntryContext<?> nativeMemoryEntryC
                 return result;
             }
         } else {
-            return cache.get(nativeMemoryEntryContext.getKey(), nativeMemoryEntryContext::load);
+            // open graphFile before load
+            try (nativeMemoryEntryContext) {
+                String key = nativeMemoryEntryContext.getKey();
+                open(key, nativeMemoryEntryContext);
+                return cache.get(key, nativeMemoryEntryContext::load);
+            }
         }
     }
 
 
@@ -12,12 +12,16 @@
 package org.opensearch.knn.index.memory;
 
 import lombok.Getter;
+import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
 import org.opensearch.cluster.service.ClusterService;
 import org.opensearch.common.Nullable;
 import org.opensearch.knn.index.codec.util.NativeMemoryCacheKeyHelper;
 import org.opensearch.knn.index.engine.qframe.QuantizationConfig;
 import org.opensearch.knn.index.VectorDataType;
+import org.opensearch.knn.index.store.IndexInputWithBuffer;
 
 import java.io.IOException;
 import java.util.Map;
@@ -26,7 +30,7 @@
 /**
  * Encapsulates all information needed to load a component into native memory.
  */
-public abstract class NativeMemoryEntryContext<T extends NativeMemoryAllocation> {
+public abstract class NativeMemoryEntryContext<T extends NativeMemoryAllocation> implements AutoCloseable {
 
     protected final String key;
 
@@ -55,13 +59,27 @@ public String getKey() {
      */
     public abstract Integer calculateSizeInKB();
 
+    /**
+     * Opens the graph file by opening the corresponding indexInput so
+     * that it is available for graph loading
+     */
+
+    public void open() {}
+
+    /**
+     * Provides the capability to close the closable objects in the {@link NativeMemoryEntryContext}
+     */
+    @Override
+    public void close() {}
+
     /**
      * Loads entry into memory.
      *
      * @return NativeMemoryAllocation associated with NativeMemoryEntryContext
      */
     public abstract T load() throws IOException;
 
+    @Log4j2
     public static class IndexEntryContext extends NativeMemoryEntryContext<NativeMemoryAllocation.IndexAllocation> {
 
         @Getter
@@ -75,6 +93,17 @@ public static class IndexEntryContext extends NativeMemoryEntryContext<NativeMem
         @Getter
         private final String modelId;
 
+        @Getter
+        private boolean indexGraphFileOpened = false;
+        @Getter
+        private int indexSizeKb;
+
+        @Getter
+        private IndexInput readStream;
+
+        @Getter
+        IndexInputWithBuffer indexInputWithBuffer;
+
         /**
          * Constructor
          *
@@ -131,10 +160,61 @@ public Integer calculateSizeInKB() {
             }
         }
 
+        @Override
+        public void open() {
+            // if graph file is already opened for index, do nothing
+            if (isIndexGraphFileOpened()) {
+                return;
+            }
+            // Extract vector file name from the given cache key.
+            // Ex: _0_165_my_field.faiss@1vaqiupVUwvkXAG4Qc/RPg==
+            final String cacheKey = this.getKey();
+            final String vectorFileName = NativeMemoryCacheKeyHelper.extractVectorIndexFileName(cacheKey);
+            if (vectorFileName == null) {
+                throw new IllegalStateException(
+                    "Invalid cache key was given. The key [" + cacheKey + "] does not contain the corresponding vector file name."
+                );
+            }
+
+            // Prepare for opening index input from directory.
+            final Directory directory = this.getDirectory();
+
+            // Try to open an index input then pass it down to native engine for loading an index.
+            try {
+                indexSizeKb = Math.toIntExact(directory.fileLength(vectorFileName) / 1024);
+                readStream = directory.openInput(vectorFileName, IOContext.READONCE);
+                readStream.seek(0);
+                indexInputWithBuffer = new IndexInputWithBuffer(readStream);
+                indexGraphFileOpened = true;
+                log.debug("[KNN] NativeMemoryCacheManager open successful");
+            } catch (IOException e) {
+                throw new RuntimeException("Failed to open the index " + openSearchIndexName);
+            }
+        }
+
         @Override
         public NativeMemoryAllocation.IndexAllocation load() throws IOException {
+            if (!isIndexGraphFileOpened()) {
+                throw new IllegalStateException("Index graph file is not open");
+            }
             return indexLoadStrategy.load(this);
         }
+
+        // close the indexInput
+        @Override
+        public void close() {
+            if (readStream != null) {
+                try {
+                    readStream.close();
+                    indexGraphFileOpened = false;
+                } catch (IOException e) {
+                    throw new RuntimeException(
+                        "Exception while closing the indexInput index [" + openSearchIndexName + "] for loading the graph file.",
+                        e
+                    );
+                }
+            }
+        }
     }
 
     public static class TrainingDataEntryContext extends NativeMemoryEntryContext<NativeMemoryAllocation.TrainingDataAllocation> {
 
@@ -13,12 +13,9 @@
 
 import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
 import org.opensearch.core.action.ActionListener;
 import org.opensearch.knn.index.codec.util.NativeMemoryCacheKeyHelper;
 import org.opensearch.knn.index.engine.qframe.QuantizationConfig;
-import org.opensearch.knn.index.store.IndexInputWithBuffer;
 import org.opensearch.knn.index.util.IndexUtil;
 import org.opensearch.knn.jni.JNIService;
 import org.opensearch.knn.index.engine.KNNEngine;
@@ -88,10 +85,16 @@ public NativeMemoryAllocation.IndexAllocation load(NativeMemoryEntryContext.Inde
             final int indexSizeKb = Math.toIntExact(directory.fileLength(vectorFileName) / 1024);
 
             // Try to open an index input then pass it down to native engine for loading an index.
-            try (IndexInput readStream = directory.openInput(vectorFileName, IOContext.READONCE)) {
-                final IndexInputWithBuffer indexInputWithBuffer = new IndexInputWithBuffer(readStream);
-                final long indexAddress = JNIService.loadIndex(indexInputWithBuffer, indexEntryContext.getParameters(), knnEngine);
-
+            // open in NativeMemoryEntryContext takes care of opening the indexInput file
+            if (!indexEntryContext.isIndexGraphFileOpened()) {
+                throw new IllegalStateException("Index [" + indexEntryContext.getOpenSearchIndexName() + "] is not preloaded");
+            }
+            try (indexEntryContext) {
+                final long indexAddress = JNIService.loadIndex(
+                    indexEntryContext.indexInputWithBuffer,
+                    indexEntryContext.getParameters(),
+                    knnEngine
+                );
                 return createIndexAllocation(indexEntryContext, knnEngine, indexAddress, indexSizeKb, vectorFileName);
             }
         }