Skip to content

Commit 109b28d

Browse files
committed
Process seeding of split fate operations in batches
Updates the Seeder in the Manager that handles seeding split fate ops to use a single thread and to submit multiple outstanding operations to be seeded together instead of individually in order to improve performance. The user fate store will now track outstanding fate operations and return a future for each pending operation that will be completed when the batch is submitted. This closes apache#5160
1 parent 0c79e44 commit 109b28d

File tree

11 files changed

+227
-129
lines changed

11 files changed

+227
-129
lines changed

core/src/main/java/org/apache/accumulo/core/conf/Property.java

+1
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ public enum Property {
461461
+ "indefinitely. Default is 0 to block indefinitely. Only valid when tserver available "
462462
+ "threshold is set greater than 0.",
463463
"1.10.0"),
464+
@Deprecated(since = "4.0.0")
464465
MANAGER_SPLIT_WORKER_THREADS("manager.split.seed.threadpool.size", "8", PropertyType.COUNT,
465466
"The number of threads used to seed fate split task, the actual split work is done by fate"
466467
+ " threads.",

core/src/main/java/org/apache/accumulo/core/fate/Fate.java

+5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.apache.accumulo.core.conf.AccumuloConfiguration;
5656
import org.apache.accumulo.core.conf.Property;
5757
import org.apache.accumulo.core.fate.FateStore.FateTxStore;
58+
import org.apache.accumulo.core.fate.FateStore.Seeder;
5859
import org.apache.accumulo.core.fate.ReadOnlyFateStore.TStatus;
5960
import org.apache.accumulo.core.logging.FateLogger;
6061
import org.apache.accumulo.core.manager.thrift.TFateOperation;
@@ -538,6 +539,10 @@ public FateId startTransaction() {
538539
return store.create();
539540
}
540541

542+
public Seeder<T> beginSeeding() {
543+
return store.beginSeeding();
544+
}
545+
541546
public void seedTransaction(FateOperation fateOp, FateKey fateKey, Repo<T> repo,
542547
boolean autoCleanUp) {
543548
try (var seeder = store.beginSeeding()) {

core/src/main/java/org/apache/accumulo/core/fate/FateStore.java

+2-9
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,6 @@ interface Seeder<T> extends AutoCloseable {
5757
* Attempts to seed a transaction with the given repo if it does not exist. A fateId will be
5858
* derived from the fateKey. If seeded, sets the following data for the fateId in the store.
5959
*
60-
* TODO: Support completing futures later in close method The current version will always return
61-
* with a CompleteableFuture that is already completed. Future version will process will
62-
* complete in the close() method for the User store.
63-
*
6460
* <ul>
6561
* <li>Set the fate op</li>
6662
* <li>Set the status to SUBMITTED</li>
@@ -77,15 +73,12 @@ interface Seeder<T> extends AutoCloseable {
7773
CompletableFuture<Optional<FateId>> attemptToSeedTransaction(Fate.FateOperation fateOp,
7874
FateKey fateKey, Repo<T> repo, boolean autoCleanUp);
7975

80-
// TODO: Right now all implementations do nothing
81-
// Eventually this would check the status of all added conditional mutations,
82-
// retry unknown, and then close the conditional writer.
8376
@Override
8477
void close();
8578
}
8679

87-
// Creates a conditional writer for the user fate store. For Zookeeper all this code will probably
88-
// do the same thing its currently doing as zookeeper does not support multi-node operations.
80+
// Creates a conditional writer for the user fate store. For Zookeeper this will be a no-op
81+
// because currently zookeeper does not support multi-node operations.
8982
Seeder<T> beginSeeding();
9083

9184
/**

core/src/main/java/org/apache/accumulo/core/fate/user/FateMutator.java

+3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.accumulo.core.fate.user;
2020

21+
import org.apache.accumulo.core.data.ConditionalMutation;
2122
import org.apache.accumulo.core.fate.Fate;
2223
import org.apache.accumulo.core.fate.FateKey;
2324
import org.apache.accumulo.core.fate.FateStore;
@@ -101,4 +102,6 @@ enum Status {
101102

102103
Status tryMutate();
103104

105+
ConditionalMutation getMutation();
106+
104107
}

core/src/main/java/org/apache/accumulo/core/fate/user/FateMutatorImpl.java

+5
Original file line numberDiff line numberDiff line change
@@ -260,4 +260,9 @@ public Status tryMutate() {
260260
throw new RuntimeException(e);
261261
}
262262
}
263+
264+
@Override
265+
public ConditionalMutation getMutation() {
266+
return mutation;
267+
}
263268
}

core/src/main/java/org/apache/accumulo/core/fate/user/UserFateStore.java

+133-32
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,26 @@
2121
import java.io.IOException;
2222
import java.io.Serializable;
2323
import java.util.EnumSet;
24+
import java.util.HashMap;
25+
import java.util.Iterator;
2426
import java.util.List;
27+
import java.util.Map;
2528
import java.util.Map.Entry;
2629
import java.util.Objects;
2730
import java.util.Optional;
2831
import java.util.SortedMap;
2932
import java.util.UUID;
3033
import java.util.concurrent.CompletableFuture;
34+
import java.util.concurrent.atomic.AtomicBoolean;
3135
import java.util.function.Function;
3236
import java.util.function.Predicate;
3337
import java.util.function.Supplier;
3438
import java.util.stream.Collectors;
3539
import java.util.stream.Stream;
3640

41+
import org.apache.accumulo.core.client.AccumuloException;
42+
import org.apache.accumulo.core.client.AccumuloSecurityException;
43+
import org.apache.accumulo.core.client.ConditionalWriter;
3744
import org.apache.accumulo.core.client.Scanner;
3845
import org.apache.accumulo.core.client.TableNotFoundException;
3946
import org.apache.accumulo.core.clientImpl.ClientContext;
@@ -47,16 +54,19 @@
4754
import org.apache.accumulo.core.fate.FateId;
4855
import org.apache.accumulo.core.fate.FateInstanceType;
4956
import org.apache.accumulo.core.fate.FateKey;
57+
import org.apache.accumulo.core.fate.FateKey.FateKeyType;
5058
import org.apache.accumulo.core.fate.ReadOnlyRepo;
5159
import org.apache.accumulo.core.fate.Repo;
5260
import org.apache.accumulo.core.fate.StackOverflowException;
61+
import org.apache.accumulo.core.fate.user.FateMutator.Status;
5362
import org.apache.accumulo.core.fate.user.schema.FateSchema.RepoColumnFamily;
5463
import org.apache.accumulo.core.fate.user.schema.FateSchema.TxColumnFamily;
5564
import org.apache.accumulo.core.fate.user.schema.FateSchema.TxInfoColumnFamily;
5665
import org.apache.accumulo.core.fate.zookeeper.ZooUtil;
5766
import org.apache.accumulo.core.iterators.user.WholeRowIterator;
5867
import org.apache.accumulo.core.security.Authorizations;
5968
import org.apache.accumulo.core.util.ColumnFQ;
69+
import org.apache.accumulo.core.util.Pair;
6070
import org.apache.accumulo.core.util.UtilWaitThread;
6171
import org.apache.hadoop.io.Text;
6272
import org.slf4j.Logger;
@@ -136,36 +146,14 @@ public FateId getFateId() {
136146

137147
@Override
138148
public Seeder<T> beginSeeding() {
139-
// TODO: For now can handle seeding 1 transaction at a time so just process
140-
// everything in attemptToSeedTransaction
141-
// Part 2 of the changes in #5160 will allow multiple seeding attempts to be combined
142-
// into one conditional mutation and we will need to track the pending operations
143-
// and futures in a map
144-
return new Seeder<T>() {
145-
@Override
146-
public CompletableFuture<Optional<FateId>> attemptToSeedTransaction(FateOperation fateOp,
147-
FateKey fateKey, Repo<T> repo, boolean autoCleanUp) {
148-
return CompletableFuture
149-
.completedFuture(seedTransaction(fateOp, fateKey, repo, autoCleanUp));
150-
}
151-
152-
@Override
153-
public void close() {
154-
// TODO: This will be used in Part 2 of #5160
155-
}
156-
};
149+
return new BatchSeeder();
157150
}
158151

159-
private Optional<FateId> seedTransaction(Fate.FateOperation fateOp, FateKey fateKey, Repo<T> repo,
160-
boolean autoCleanUp) {
161-
final var fateId = fateIdGenerator.fromTypeAndKey(type(), fateKey);
152+
private FateMutator<T> seedTransaction(Fate.FateOperation fateOp, FateKey fateKey, FateId fateId,
153+
Repo<T> repo, boolean autoCleanUp) {
162154
Supplier<FateMutator<T>> mutatorFactory = () -> newMutator(fateId).requireAbsent()
163155
.putKey(fateKey).putCreateTime(System.currentTimeMillis());
164-
if (seedTransaction(mutatorFactory, fateKey + " " + fateId, fateOp, repo, autoCleanUp)) {
165-
return Optional.of(fateId);
166-
} else {
167-
return Optional.empty();
168-
}
156+
return buildMutator(mutatorFactory, fateOp, repo, autoCleanUp);
169157
}
170158

171159
@Override
@@ -176,16 +164,22 @@ public boolean seedTransaction(Fate.FateOperation fateOp, FateId fateId, Repo<T>
176164
return seedTransaction(mutatorFactory, fateId.canonical(), fateOp, repo, autoCleanUp);
177165
}
178166

167+
private FateMutator<T> buildMutator(Supplier<FateMutator<T>> mutatorFactory,
168+
Fate.FateOperation fateOp, Repo<T> repo, boolean autoCleanUp) {
169+
var mutator = mutatorFactory.get();
170+
mutator =
171+
mutator.putFateOp(serializeTxInfo(fateOp)).putRepo(1, repo).putStatus(TStatus.SUBMITTED);
172+
if (autoCleanUp) {
173+
mutator = mutator.putAutoClean(serializeTxInfo(autoCleanUp));
174+
}
175+
return mutator;
176+
}
177+
179178
private boolean seedTransaction(Supplier<FateMutator<T>> mutatorFactory, String logId,
180179
Fate.FateOperation fateOp, Repo<T> repo, boolean autoCleanUp) {
180+
var mutator = buildMutator(mutatorFactory, fateOp, repo, autoCleanUp);
181181
int maxAttempts = 5;
182182
for (int attempt = 0; attempt < maxAttempts; attempt++) {
183-
var mutator = mutatorFactory.get();
184-
mutator =
185-
mutator.putFateOp(serializeTxInfo(fateOp)).putRepo(1, repo).putStatus(TStatus.SUBMITTED);
186-
if (autoCleanUp) {
187-
mutator = mutator.putAutoClean(serializeTxInfo(autoCleanUp));
188-
}
189183
var status = mutator.tryMutate();
190184
if (status == FateMutator.Status.ACCEPTED) {
191185
// signal to the super class that a new fate transaction was seeded and is ready to run
@@ -393,6 +387,113 @@ public FateInstanceType type() {
393387
return fateInstanceType;
394388
}
395389

390+
private class BatchSeeder implements Seeder<T> {
391+
private final AtomicBoolean closed = new AtomicBoolean(false);
392+
393+
private final Map<FateId,Pair<FateMutator<T>,CompletableFuture<Optional<FateId>>>> pending =
394+
new HashMap<>();
395+
396+
@Override
397+
public CompletableFuture<Optional<FateId>> attemptToSeedTransaction(FateOperation fateOp,
398+
FateKey fateKey, Repo<T> repo, boolean autoCleanUp) {
399+
Preconditions.checkState(!closed.get(), "Can't attempt to seed with a closed seeder.");
400+
401+
final var fateId = fateIdGenerator.fromTypeAndKey(type(), fateKey);
402+
// If not already submitted, add to the pending list and return the future
403+
// or the existing future if duplicate. The pending map will store the mutator
404+
// to be processed on close in a one batch.
405+
return pending.computeIfAbsent(fateId, id -> {
406+
FateMutator<T> mutator = seedTransaction(fateOp, fateKey, fateId, repo, autoCleanUp);
407+
CompletableFuture<Optional<FateId>> future = new CompletableFuture<>();
408+
return new Pair<>(mutator, future);
409+
}).getSecond();
410+
}
411+
412+
@Override
413+
public void close() {
414+
closed.set(true);
415+
416+
int maxAttempts = 5;
417+
418+
// This loop will submit all the pending mutations as one batch
419+
// to a conditional writer and any known results will be removed
420+
// from the pending map. Unknown results will be re-attempted up
421+
// to the maxAttempts count
422+
for (int attempt = 0; attempt < maxAttempts; attempt++) {
423+
var currentResults = tryMutateBatch();
424+
for (Entry<FateId,ConditionalWriter.Status> result : currentResults.entrySet()) {
425+
var fateId = result.getKey();
426+
var status = result.getValue();
427+
var future = pending.get(fateId).getSecond();
428+
switch (result.getValue()) {
429+
case ACCEPTED:
430+
seededTx();
431+
log.trace("Attempt to seed {} returned {}", fateId.canonical(), status);
432+
// Complete the future with the fatId and remove from pending
433+
future.complete(Optional.of(fateId));
434+
pending.remove(fateId);
435+
break;
436+
case REJECTED:
437+
log.debug("Attempt to seed {} returned {}", fateId.canonical(), status);
438+
// Rejected so complete with an empty optional and remove from pending
439+
future.complete(Optional.empty());
440+
pending.remove(fateId);
441+
break;
442+
case UNKNOWN:
443+
log.debug("Attempt to seed {} returned {} status, retrying", fateId.canonical(),
444+
status);
445+
// unknown, so don't remove from map so that we try again if still under
446+
// max attempts
447+
break;
448+
default:
449+
// do not expect other statuses
450+
throw new IllegalStateException("Unhandled status for mutation " + status);
451+
}
452+
}
453+
454+
if (!pending.isEmpty()) {
455+
// At this point can not reliably determine if the unknown pending mutations were
456+
// successful or not because no reservation was acquired. For example since no
457+
// reservation was acquired it is possible that seeding was a success and something
458+
// immediately picked it up and started operating on it and changing it.
459+
// If scanning after that point can not conclude success or failure. Another situation
460+
// is that maybe the fateId already existed in a seeded form prior to getting this
461+
// unknown.
462+
UtilWaitThread.sleep(250);
463+
}
464+
}
465+
466+
// Any remaining will be UNKNOWN status, so complete the futures with an optional empty
467+
pending.forEach((fateId, pair) -> {
468+
pair.getSecond().complete(Optional.empty());
469+
log.warn("Repeatedly received unknown status when attempting to seed {}",
470+
fateId.canonical());
471+
});
472+
}
473+
474+
// Submit all the pending mutations to a single conditional writer
475+
// as one batch and return the results for each mutation
476+
private Map<FateId,ConditionalWriter.Status> tryMutateBatch() {
477+
if (pending.isEmpty()) {
478+
return Map.of();
479+
}
480+
481+
final Map<FateId,ConditionalWriter.Status> resultsMap = new HashMap<>();
482+
try (ConditionalWriter writer = context.createConditionalWriter(tableName)) {
483+
Iterator<ConditionalWriter.Result> results = writer
484+
.write(pending.values().stream().map(pair -> pair.getFirst().getMutation()).iterator());
485+
while (results.hasNext()) {
486+
var result = results.next();
487+
var row = new Text(result.getMutation().getRow());
488+
resultsMap.put(FateId.from(FateInstanceType.USER, row.toString()), result.getStatus());
489+
}
490+
} catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
491+
throw new IllegalStateException(e);
492+
}
493+
return resultsMap;
494+
}
495+
}
496+
396497
private class FateTxStoreImpl extends AbstractFateTxStoreImpl {
397498

398499
private FateTxStoreImpl(FateId fateId) {

server/manager/src/main/java/org/apache/accumulo/manager/Manager.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1347,7 +1347,7 @@ boolean canSuspendTablets() {
13471347
// Don't call start the CompactionCoordinator until we have tservers and upgrade is complete.
13481348
compactionCoordinator.start();
13491349

1350-
this.splitter = new Splitter(context);
1350+
this.splitter = new Splitter(this);
13511351
this.splitter.start();
13521352

13531353
try {

server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
import org.apache.accumulo.core.util.threads.Threads;
7676
import org.apache.accumulo.core.util.threads.Threads.AccumuloDaemonThread;
7777
import org.apache.accumulo.manager.metrics.ManagerMetrics;
78-
import org.apache.accumulo.manager.split.SeedSplitTask;
7978
import org.apache.accumulo.manager.state.TableCounts;
8079
import org.apache.accumulo.manager.state.TableStats;
8180
import org.apache.accumulo.manager.upgrade.UpgradeCoordinator;
@@ -607,7 +606,7 @@ private TableMgmtStats manageTablets(Iterator<TabletManagement> iter,
607606
final boolean needsSplit = actions.contains(ManagementAction.NEEDS_SPLITTING);
608607
if (needsSplit) {
609608
LOG.debug("{} may need splitting.", tm.getExtent());
610-
manager.getSplitter().initiateSplit(new SeedSplitTask(manager, tm.getExtent()));
609+
manager.getSplitter().initiateSplit(tm.getExtent());
611610
}
612611

613612
if (actions.contains(ManagementAction.NEEDS_COMPACTING) && compactionGenerator != null) {

server/manager/src/main/java/org/apache/accumulo/manager/split/SeedSplitTask.java

-55
This file was deleted.

0 commit comments

Comments
 (0)