Skip to content

Commit e29bee5

Browse files
authored
Pause indexing completely in serverless when throttling (#127173)
This PR seeks to address stability issues seen in some CSPs. See ES-11516 for details. Serverless PR#3801 associated with this change. When throttling is enabled for indexing, we limit indexing to 1 thread per shard. However, this might not be sufficient throttling in serverless where we might have a large number of shards. With this change we pause indexing completely when throttling is enabled.
1 parent 4d052d7 commit e29bee5

File tree

5 files changed

+90
-4
lines changed

5 files changed

+90
-4
lines changed

server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ public void apply(Settings value, Settings current, Settings previous) {
526526
IndexingMemoryController.MAX_INDEX_BUFFER_SIZE_SETTING,
527527
IndexingMemoryController.SHARD_INACTIVE_TIME_SETTING,
528528
IndexingMemoryController.SHARD_MEMORY_INTERVAL_TIME_SETTING,
529+
IndexingMemoryController.PAUSE_INDEXING_ON_THROTTLE,
529530
ResourceWatcherService.ENABLED,
530531
ResourceWatcherService.RELOAD_INTERVAL_HIGH,
531532
ResourceWatcherService.RELOAD_INTERVAL_MEDIUM,

server/src/main/java/org/elasticsearch/index/engine/Engine.java

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.index.engine;
1111

12+
import org.apache.logging.log4j.LogManager;
1213
import org.apache.logging.log4j.Logger;
1314
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
1415
import org.apache.lucene.index.ByteVectorValues;
@@ -87,6 +88,7 @@
8788
import org.elasticsearch.index.store.Store;
8889
import org.elasticsearch.index.translog.Translog;
8990
import org.elasticsearch.index.translog.TranslogStats;
91+
import org.elasticsearch.indices.IndexingMemoryController;
9092
import org.elasticsearch.indices.recovery.RecoverySettings;
9193
import org.elasticsearch.search.suggest.completion.CompletionStats;
9294
import org.elasticsearch.threadpool.ThreadPool;
@@ -108,6 +110,7 @@
108110
import java.util.Set;
109111
import java.util.concurrent.CountDownLatch;
110112
import java.util.concurrent.ExecutionException;
113+
import java.util.concurrent.Semaphore;
111114
import java.util.concurrent.TimeUnit;
112115
import java.util.concurrent.atomic.AtomicBoolean;
113116
import java.util.concurrent.locks.Condition;
@@ -145,6 +148,7 @@ public abstract class Engine implements Closeable {
145148
protected final ReentrantLock failEngineLock = new ReentrantLock();
146149
protected final SetOnce<Exception> failedEngine = new SetOnce<>();
147150
protected final boolean enableRecoverySource;
151+
protected final boolean pauseIndexingOnThrottle;
148152

149153
private final AtomicBoolean isClosing = new AtomicBoolean();
150154
private final SubscribableListener<Void> drainOnCloseListener = new SubscribableListener<>();
@@ -176,6 +180,9 @@ protected Engine(EngineConfig engineConfig) {
176180
this.enableRecoverySource = RecoverySettings.INDICES_RECOVERY_SOURCE_ENABLED_SETTING.get(
177181
engineConfig.getIndexSettings().getSettings()
178182
);
183+
this.pauseIndexingOnThrottle = IndexingMemoryController.PAUSE_INDEXING_ON_THROTTLE.get(
184+
engineConfig.getIndexSettings().getSettings()
185+
);
179186
}
180187

181188
/**
@@ -444,12 +451,19 @@ public interface IndexCommitListener {
444451
* is enabled
445452
*/
446453
protected static final class IndexThrottle {
454+
private static final Logger logger = LogManager.getLogger(IndexThrottle.class);
447455
private final CounterMetric throttleTimeMillisMetric = new CounterMetric();
448456
private volatile long startOfThrottleNS;
449457
private static final ReleasableLock NOOP_LOCK = new ReleasableLock(new NoOpLock());
450-
private final ReleasableLock lockReference = new ReleasableLock(new ReentrantLock());
458+
private final PauseLock throttlingLock;
459+
private final ReleasableLock lockReference;
451460
private volatile ReleasableLock lock = NOOP_LOCK;
452461

462+
public IndexThrottle(boolean pause) {
463+
throttlingLock = new PauseLock(pause ? 0 : 1);
464+
lockReference = new ReleasableLock(throttlingLock);
465+
}
466+
453467
public Releasable acquireThrottle() {
454468
return lock.acquire();
455469
}
@@ -458,12 +472,15 @@ public Releasable acquireThrottle() {
458472
public void activate() {
459473
assert lock == NOOP_LOCK : "throttling activated while already active";
460474
startOfThrottleNS = System.nanoTime();
475+
throttlingLock.throttle();
461476
lock = lockReference;
462477
}
463478

464479
/** Deactivate throttling, which switches the lock to be an always-acquirable NoOpLock */
465480
public void deactivate() {
466481
assert lock != NOOP_LOCK : "throttling deactivated but not active";
482+
483+
throttlingLock.unthrottle();
467484
lock = NOOP_LOCK;
468485

469486
assert startOfThrottleNS > 0 : "Bad state of startOfThrottleNS";
@@ -553,6 +570,58 @@ public Condition newCondition() {
553570
}
554571
}
555572

573+
/* A lock implementation that allows us to control how many threads can take the lock
574+
* In particular, this is used to set the number of allowed threads to 1 or 0
575+
* when index throttling is activated.
576+
*/
577+
protected static final class PauseLock implements Lock {
578+
private final Semaphore semaphore = new Semaphore(Integer.MAX_VALUE);
579+
private final int allowThreads;
580+
581+
public PauseLock(int allowThreads) {
582+
this.allowThreads = allowThreads;
583+
}
584+
585+
public void lock() {
586+
semaphore.acquireUninterruptibly();
587+
}
588+
589+
@Override
590+
public void lockInterruptibly() throws InterruptedException {
591+
semaphore.acquire();
592+
}
593+
594+
@Override
595+
public void unlock() {
596+
semaphore.release();
597+
}
598+
599+
@Override
600+
public boolean tryLock() {
601+
throw new UnsupportedOperationException();
602+
}
603+
604+
@Override
605+
public boolean tryLock(long time, TimeUnit unit) throws InterruptedException {
606+
throw new UnsupportedOperationException();
607+
}
608+
609+
@Override
610+
public Condition newCondition() {
611+
throw new UnsupportedOperationException();
612+
}
613+
614+
public void throttle() {
615+
assert semaphore.availablePermits() == Integer.MAX_VALUE;
616+
semaphore.acquireUninterruptibly(Integer.MAX_VALUE - allowThreads);
617+
}
618+
619+
public void unthrottle() {
620+
assert semaphore.availablePermits() <= allowThreads;
621+
semaphore.release(Integer.MAX_VALUE - allowThreads);
622+
}
623+
}
624+
556625
/**
557626
* Perform document index operation on the engine
558627
* @param index operation to perform

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ public class InternalEngine extends Engine {
171171
private final CombinedDeletionPolicy combinedDeletionPolicy;
172172

173173
// How many callers are currently requesting index throttling. Currently there are only two situations where we do this: when merges
174-
// are falling behind and when writing indexing buffer to disk is too slow. When this is 0, there is no throttling, else we throttling
174+
// are falling behind and when writing indexing buffer to disk is too slow. When this is 0, there is no throttling, else we throttle
175175
// incoming indexing ops to a single thread:
176176
private final AtomicInteger throttleRequestCount = new AtomicInteger();
177177
private final AtomicBoolean pendingTranslogRecovery = new AtomicBoolean(false);
@@ -260,7 +260,7 @@ public InternalEngine(EngineConfig engineConfig) {
260260
engineConfig.getThreadPoolMergeExecutorService()
261261
);
262262
scheduler = mergeScheduler.getMergeScheduler();
263-
throttle = new IndexThrottle();
263+
throttle = new IndexThrottle(pauseIndexingOnThrottle);
264264
try {
265265
store.trimUnsafeCommits(config().getTranslogConfig().getTranslogPath());
266266
translog = openTranslog(

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2733,6 +2733,9 @@ public IndexEventListener getIndexEventListener() {
27332733
return indexEventListener;
27342734
}
27352735

2736+
/** Activate throttling for this shard. If {@link IndexingMemoryController#PAUSE_INDEXING_ON_THROTTLE}
2737+
* setting is set to true, throttling will pause indexing completely. Otherwise, indexing will be throttled to one thread.
2738+
*/
27362739
public void activateThrottling() {
27372740
try {
27382741
getEngine().activateThrottling();

server/src/main/java/org/elasticsearch/indices/IndexingMemoryController.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,17 @@ public class IndexingMemoryController implements IndexingOperationListener, Clos
8787
Property.NodeScope
8888
);
8989

90+
/* Currently, indexing is throttled due to memory pressure in stateful/stateless or disk pressure in stateless.
91+
* This limits the number of indexing threads to 1 per shard. However, this might not be enough when the number of
92+
* shards that need indexing is larger than the number of threads. So we might opt to pause indexing completely.
93+
* The default value for this setting is false, but it will be set to true in stateless.
94+
*/
95+
public static final Setting<Boolean> PAUSE_INDEXING_ON_THROTTLE = Setting.boolSetting(
96+
"indices.pause.on.throttle",
97+
false,
98+
Property.NodeScope
99+
);
100+
90101
private final ThreadPool threadPool;
91102

92103
private final Iterable<IndexShard> indexShards;
@@ -236,7 +247,9 @@ void forceCheck() {
236247
statusChecker.run();
237248
}
238249

239-
/** Asks this shard to throttle indexing to one thread */
250+
/** Asks this shard to throttle indexing to one thread. If the PAUSE_INDEXING_ON_THROTTLE seeting is set to true,
251+
* throttling will pause indexing completely for the throttled shard.
252+
*/
240253
protected void activateThrottling(IndexShard shard) {
241254
shard.activateThrottling();
242255
}

0 commit comments

Comments
 (0)