Skip to content

Commit 5fb5d8b

Browse files
committed
Merge remote-tracking branch 'upstream/elasticity' into importWithHostingGoal
2 parents 9a257e1 + a089f86 commit 5fb5d8b

File tree

83 files changed

+784
-612
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+784
-612
lines changed

core/src/main/java/org/apache/accumulo/core/conf/Property.java

-1
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,6 @@ public enum Property {
755755
"The listening port for the garbage collector's monitor service.", "1.3.5"),
756756
GC_DELETE_THREADS("gc.threads.delete", "16", PropertyType.COUNT,
757757
"The number of threads used to delete RFiles and write-ahead logs.", "1.3.5"),
758-
@Experimental
759758
GC_REMOVE_IN_USE_CANDIDATES("gc.remove.in.use.candidates", "true", PropertyType.BOOLEAN,
760759
"GC will remove deletion candidates that are in-use from the metadata location. "
761760
+ "This is expected to increase the speed of subsequent GC runs.",

core/src/main/java/org/apache/accumulo/core/fate/Fate.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ public void run() {
157157
} else if (status == SUBMITTED || status == IN_PROGRESS) {
158158
Repo<T> prevOp = null;
159159
try {
160-
deferTime = op.isReady(txStore.getID().getTid(), environment);
160+
deferTime = op.isReady(txStore.getID(), environment);
161161

162162
// Here, deferTime is only used to determine success (zero) or failure (non-zero),
163163
// proceeding on success and returning to the while loop on failure.
@@ -167,7 +167,7 @@ public void run() {
167167
if (status == SUBMITTED) {
168168
txStore.setStatus(IN_PROGRESS);
169169
}
170-
op = op.call(txStore.getID().getTid(), environment);
170+
op = op.call(txStore.getID(), environment);
171171
} else {
172172
continue;
173173
}
@@ -278,7 +278,7 @@ private void doCleanUp(FateTxStore<T> txStore) {
278278

279279
private void undo(FateId fateId, Repo<T> op) {
280280
try {
281-
op.undo(fateId.getTid(), environment);
281+
op.undo(fateId, environment);
282282
} catch (Exception e) {
283283
log.warn("Failed to undo Repo, " + fateId, e);
284284
}

core/src/main/java/org/apache/accumulo/core/fate/FateId.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,9 @@ public static FateId fromThrift(TFateId tFateId) {
105105
}
106106

107107
/**
108-
* Formats transaction ids in a consistent way that is useful for logging and persisting.
108+
* Returns the hex string equivalent of the tid
109109
*/
110110
public static String formatTid(long tid) {
111-
// do not change how this formats without considering implications for persistence
112111
return FastFormat.toHexString(tid);
113112
}
114113
}

core/src/main/java/org/apache/accumulo/core/fate/ReadOnlyRepo.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@
2525
* also be safe to call without impacting the state of system components.
2626
*/
2727
public interface ReadOnlyRepo<T> {
28-
29-
long isReady(long tid, T environment) throws Exception;
28+
long isReady(FateId fateId, T environment) throws Exception;
3029

3130
String getName();
3231

core/src/main/java/org/apache/accumulo/core/fate/Repo.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525
*/
2626
public interface Repo<T> extends ReadOnlyRepo<T>, Serializable {
2727

28-
Repo<T> call(long tid, T environment) throws Exception;
28+
Repo<T> call(FateId fateId, T environment) throws Exception;
2929

30-
void undo(long tid, T environment) throws Exception;
30+
void undo(FateId fateId, T environment) throws Exception;
3131

3232
// this allows the last fate op to return something to the user
3333
String getReturn();

core/src/main/java/org/apache/accumulo/core/manager/state/TabletManagement.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,12 @@ public class TabletManagement {
5353
private static final Text EMPTY = new Text("");
5454

5555
public static enum ManagementAction {
56-
BAD_STATE, NEEDS_COMPACTING, NEEDS_LOCATION_UPDATE, NEEDS_SPLITTING, NEEDS_VOLUME_REPLACEMENT;
56+
BAD_STATE,
57+
NEEDS_COMPACTING,
58+
NEEDS_LOCATION_UPDATE,
59+
NEEDS_RECOVERY,
60+
NEEDS_SPLITTING,
61+
NEEDS_VOLUME_REPLACEMENT;
5762
}
5863

5964
public static void addActions(final SortedMap<Key,Value> decodedRow,

server/base/src/main/java/org/apache/accumulo/server/compaction/RetryableThriftCall.java

+8-1
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,18 @@ public RetryableThriftCall(long start, long maxWaitTime, int maxNumRetries,
9595
*/
9696
public T run() throws RetriesExceededException {
9797
T result = null;
98+
var errorsSeen = 0;
9899
do {
99100
try {
100101
result = function.execute();
101102
} catch (TException e) {
102-
LOG.error("Error in Thrift function, retrying ...", e);
103+
errorsSeen++;
104+
// Log higher levels of errors on every 5th error
105+
if (errorsSeen >= 5 && errorsSeen % 5 == 0) {
106+
LOG.warn("Error in Thrift function, retrying ...", e);
107+
} else {
108+
LOG.debug("Error in Thrift function, retrying ...", e);
109+
}
103110
}
104111
if (result == null) {
105112
if (this.retry.canRetry()) {

server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java

+10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.accumulo.server.manager.state;
2020

21+
import org.apache.accumulo.core.client.admin.TabletAvailability;
2122
import org.apache.accumulo.core.data.TabletId;
2223
import org.apache.accumulo.core.dataImpl.KeyExtent;
2324
import org.apache.accumulo.core.dataImpl.TabletIdImpl;
@@ -84,6 +85,15 @@ public static TabletGoalState compute(TabletMetadata tm, TabletState currentStat
8485
return TabletGoalState.UNASSIGNED;
8586
}
8687

88+
// When the tablet has wals and it will not be hosted normally, then cause it to
89+
// be hosted so that recovery can occur. When tablet availability is ONDEMAND or
90+
// UNHOSTED, then this tablet will eventually become unhosted after recovery occurs.
91+
// This could cause a little bit of churn on the cluster w/r/t balancing, but it's
92+
// necessary.
93+
if (!tm.getLogs().isEmpty() && tm.getTabletAvailability() != TabletAvailability.HOSTED) {
94+
return TabletGoalState.HOSTED;
95+
}
96+
8797
if (!params.isTableOnline(tm.getTableId())) {
8898
return UNASSIGNED;
8999
}

server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletManagementIterator.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SuspendLocationColumn;
5656
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily;
5757
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
58+
import org.apache.accumulo.core.metadata.schema.TabletOperationType;
5859
import org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer;
5960
import org.apache.accumulo.core.spi.balancer.TabletBalancer;
6061
import org.apache.accumulo.core.spi.compaction.CompactionKind;
@@ -248,13 +249,22 @@ private void computeTabletManagementActions(final TabletMetadata tm,
248249
if (tm.isFutureAndCurrentLocationSet()) {
249250
// no need to check everything, we are in a known state where we want to return everything.
250251
reasonsToReturnThisTablet.add(ManagementAction.BAD_STATE);
251-
return;
252+
}
253+
254+
if (!tm.getLogs().isEmpty() && (tm.getOperationId() == null
255+
|| tm.getOperationId().getType() != TabletOperationType.DELETING)) {
256+
reasonsToReturnThisTablet.add(ManagementAction.NEEDS_RECOVERY);
252257
}
253258

254259
if (VolumeUtil.needsVolumeReplacement(tabletMgmtParams.getVolumeReplacements(), tm)) {
255260
reasonsToReturnThisTablet.add(ManagementAction.NEEDS_VOLUME_REPLACEMENT);
256261
}
257262

263+
if (!reasonsToReturnThisTablet.isEmpty()) {
264+
// If volume replacement or recovery is needed, then return early.
265+
return;
266+
}
267+
258268
if (shouldReturnDueToLocation(tm)) {
259269
reasonsToReturnThisTablet.add(ManagementAction.NEEDS_LOCATION_UPDATE);
260270
}

server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectWriteAheadLogs.java

+52-45
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
import java.util.Map.Entry;
3535
import java.util.Set;
3636
import java.util.UUID;
37+
import java.util.concurrent.atomic.AtomicBoolean;
38+
import java.util.stream.Stream;
3739

3840
import org.apache.accumulo.core.gc.thrift.GCStatus;
3941
import org.apache.accumulo.core.gc.thrift.GcCycleStats;
@@ -42,6 +44,7 @@
4244
import org.apache.accumulo.core.metadata.TabletState;
4345
import org.apache.accumulo.core.metadata.schema.Ample.DataLevel;
4446
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
47+
import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
4548
import org.apache.accumulo.core.tabletserver.log.LogEntry;
4649
import org.apache.accumulo.core.trace.TraceUtil;
4750
import org.apache.accumulo.core.util.Pair;
@@ -57,7 +60,8 @@
5760
import org.slf4j.LoggerFactory;
5861

5962
import com.google.common.annotations.VisibleForTesting;
60-
import com.google.common.collect.Iterators;
63+
import com.google.common.base.Preconditions;
64+
import com.google.common.collect.Streams;
6165

6266
import io.opentelemetry.api.trace.Span;
6367
import io.opentelemetry.context.Scope;
@@ -69,7 +73,7 @@ public class GarbageCollectWriteAheadLogs {
6973
private final VolumeManager fs;
7074
private final LiveTServerSet liveServers;
7175
private final WalStateManager walMarker;
72-
private final Iterable<TabletMetadata> store;
76+
private final AtomicBoolean hasCollected;
7377

7478
/**
7579
* Creates a new GC WAL object.
@@ -82,34 +86,32 @@ public class GarbageCollectWriteAheadLogs {
8286
this.context = context;
8387
this.fs = fs;
8488
this.liveServers = liveServers;
85-
this.walMarker = new WalStateManager(context);
86-
this.store = () -> Iterators.concat(
87-
context.getAmple().readTablets().forLevel(DataLevel.ROOT).filter(new HasWalsFilter())
88-
.fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).build().iterator(),
89-
context.getAmple().readTablets().forLevel(DataLevel.METADATA).filter(new HasWalsFilter())
90-
.fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).build().iterator(),
91-
context.getAmple().readTablets().forLevel(DataLevel.USER).filter(new HasWalsFilter())
92-
.fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).build().iterator());
89+
this.walMarker = createWalStateManager(context);
90+
this.hasCollected = new AtomicBoolean(false);
9391
}
9492

95-
/**
96-
* Creates a new GC WAL object. Meant for testing -- allows mocked objects.
97-
*
98-
* @param context the collection server's context
99-
* @param fs volume manager to use
100-
* @param liveTServerSet a started LiveTServerSet instance
101-
*/
10293
@VisibleForTesting
103-
GarbageCollectWriteAheadLogs(ServerContext context, VolumeManager fs,
104-
LiveTServerSet liveTServerSet, WalStateManager walMarker, Iterable<TabletMetadata> store) {
105-
this.context = context;
106-
this.fs = fs;
107-
this.liveServers = liveTServerSet;
108-
this.walMarker = walMarker;
109-
this.store = store;
94+
WalStateManager createWalStateManager(ServerContext context) {
95+
return new WalStateManager(context);
96+
}
97+
98+
@VisibleForTesting
99+
Stream<TabletMetadata> createStore() {
100+
TabletsMetadata root = context.getAmple().readTablets().forLevel(DataLevel.ROOT)
101+
.filter(new HasWalsFilter()).fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).build();
102+
TabletsMetadata metadata = context.getAmple().readTablets().forLevel(DataLevel.METADATA)
103+
.filter(new HasWalsFilter()).fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).build();
104+
TabletsMetadata user = context.getAmple().readTablets().forLevel(DataLevel.USER)
105+
.filter(new HasWalsFilter()).fetch(LOCATION, LAST, LOGS, PREV_ROW, SUSPEND).build();
106+
return Streams.concat(root.stream(), metadata.stream(), user.stream()).onClose(() -> {
107+
root.close();
108+
metadata.close();
109+
user.close();
110+
});
110111
}
111112

112113
public void collect(GCStatus status) {
114+
Preconditions.checkState(!hasCollected.get(), "Can only call collect once per object");
113115
try {
114116
long count;
115117
long fileScanStop;
@@ -197,7 +199,7 @@ public void collect(GCStatus status) {
197199
} finally {
198200
span5.end();
199201
}
200-
202+
hasCollected.set(true);
201203
} catch (Exception e) {
202204
log.error("exception occurred while garbage collecting write ahead logs", e);
203205
} finally {
@@ -279,28 +281,32 @@ private Map<UUID,TServerInstance> removeEntriesInUse(Map<TServerInstance,Set<UUI
279281
}
280282

281283
// remove any entries if there's a log reference (recovery hasn't finished)
282-
for (TabletMetadata tabletMetadata : store) {
283-
// Tablet is still assigned to a dead server. Manager has moved markers and reassigned it
284-
// Easiest to just ignore all the WALs for the dead server.
285-
if (TabletState.compute(tabletMetadata, liveServers) == TabletState.ASSIGNED_TO_DEAD_SERVER) {
286-
Set<UUID> idsToIgnore = candidates.remove(tabletMetadata.getLocation().getServerInstance());
287-
if (idsToIgnore != null) {
288-
result.keySet().removeAll(idsToIgnore);
289-
recoveryLogs.keySet().removeAll(idsToIgnore);
284+
try (Stream<TabletMetadata> store = createStore()) {
285+
store.forEach(tabletMetadata -> {
286+
// Tablet is still assigned to a dead server. Manager has moved markers and reassigned it
287+
// Easiest to just ignore all the WALs for the dead server.
288+
if (TabletState.compute(tabletMetadata, liveServers)
289+
== TabletState.ASSIGNED_TO_DEAD_SERVER) {
290+
Set<UUID> idsToIgnore =
291+
candidates.remove(tabletMetadata.getLocation().getServerInstance());
292+
if (idsToIgnore != null) {
293+
result.keySet().removeAll(idsToIgnore);
294+
recoveryLogs.keySet().removeAll(idsToIgnore);
295+
}
290296
}
291-
}
292-
// Tablet is being recovered and has WAL references, remove all the WALs for the dead server
293-
// that made the WALs.
294-
for (LogEntry wal : tabletMetadata.getLogs()) {
295-
UUID walUUID = wal.getUniqueID();
296-
TServerInstance dead = result.get(walUUID);
297-
// There's a reference to a log file, so skip that server's logs
298-
Set<UUID> idsToIgnore = candidates.remove(dead);
299-
if (idsToIgnore != null) {
300-
result.keySet().removeAll(idsToIgnore);
301-
recoveryLogs.keySet().removeAll(idsToIgnore);
297+
// Tablet is being recovered and has WAL references, remove all the WALs for the dead server
298+
// that made the WALs.
299+
for (LogEntry wal : tabletMetadata.getLogs()) {
300+
UUID walUUID = wal.getUniqueID();
301+
TServerInstance dead = result.get(walUUID);
302+
// There's a reference to a log file, so skip that server's logs
303+
Set<UUID> idsToIgnore = candidates.remove(dead);
304+
if (idsToIgnore != null) {
305+
result.keySet().removeAll(idsToIgnore);
306+
recoveryLogs.keySet().removeAll(idsToIgnore);
307+
}
302308
}
303-
}
309+
});
304310
}
305311

306312
// Remove OPEN and CLOSED logs for live servers: they are still in use
@@ -369,4 +375,5 @@ protected Map<UUID,Path> getSortedWALogs() throws IOException {
369375
}
370376
return result;
371377
}
378+
372379
}

0 commit comments

Comments
 (0)