Skip to content

Commit 7679f0c

Browse files
authored
Halt TabletServer on minc failure and no TabletServer lock (apache#5169)
Related to apache#5137
1 parent 712e50e commit 7679f0c

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/MinorCompactor.java

+15-5
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@
2929

3030
import org.apache.accumulo.core.conf.AccumuloConfiguration;
3131
import org.apache.accumulo.core.data.ByteSequence;
32+
import org.apache.accumulo.core.fate.zookeeper.ServiceLock;
3233
import org.apache.accumulo.core.manager.state.tables.TableState;
3334
import org.apache.accumulo.core.metadata.TabletFile;
35+
import org.apache.accumulo.core.util.Halt;
3436
import org.apache.accumulo.core.util.LocalityGroupUtil;
3537
import org.apache.accumulo.server.compaction.CompactionStats;
3638
import org.apache.accumulo.server.compaction.FileCompactor;
@@ -93,11 +95,19 @@ public CompactionStats call() {
9395
try {
9496
do {
9597
try {
96-
CompactionStats ret = super.call();
97-
98-
// log.debug(String.format("MinC %,d recs in | %,d recs out | %,d recs/sec | %6.3f secs |
99-
// %,d bytes ",map.size(), entriesCompacted,
100-
// (int)(map.size()/((t2 - t1)/1000.0)), (t2 - t1)/1000.0, estimatedSizeInBytes()));
98+
CompactionStats ret = null;
99+
try {
100+
ret = super.call();
101+
} catch (Exception e) {
102+
final ServiceLock tserverLock = tabletServer.getLock();
103+
if (tserverLock == null || !tserverLock.verifyLockAtSource()) {
104+
log.error("Minor compaction of {} has failed and TabletServer lock does not exist."
105+
+ " Halting...", getExtent(), e);
106+
Halt.halt("TabletServer lock does not exist", -1);
107+
} else {
108+
throw e;
109+
}
110+
}
101111

102112
if (reportedProblem) {
103113
ProblemReports.getInstance(tabletServer.getContext())

server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java

+11-2
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
import org.apache.accumulo.core.data.Value;
6666
import org.apache.accumulo.core.dataImpl.KeyExtent;
6767
import org.apache.accumulo.core.dataImpl.thrift.MapFileInfo;
68+
import org.apache.accumulo.core.fate.zookeeper.ServiceLock;
6869
import org.apache.accumulo.core.file.FileOperations;
6970
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
7071
import org.apache.accumulo.core.iteratorsImpl.system.SourceSwitchingIterator;
@@ -91,6 +92,7 @@
9192
import org.apache.accumulo.core.tabletserver.log.LogEntry;
9293
import org.apache.accumulo.core.tabletserver.thrift.TabletStats;
9394
import org.apache.accumulo.core.trace.TraceUtil;
95+
import org.apache.accumulo.core.util.Halt;
9496
import org.apache.accumulo.core.util.Pair;
9597
import org.apache.accumulo.core.volume.Volume;
9698
import org.apache.accumulo.server.ServerContext;
@@ -508,8 +510,15 @@ DataFileValue minorCompact(InMemoryMap memTable, TabletFile tmpDatafile, TabletF
508510
flushId);
509511
storedFile.ifPresent(stf -> compactable.filesAdded(true, List.of(stf)));
510512
} catch (Exception e) {
511-
TraceUtil.setException(span2, e, true);
512-
throw e;
513+
final ServiceLock tserverLock = tabletServer.getLock();
514+
if (tserverLock == null || !tserverLock.verifyLockAtSource()) {
515+
log.error("Minor compaction of {} has failed and TabletServer lock does not exist."
516+
+ " Halting...", getExtent(), e);
517+
Halt.halt("TabletServer lock does not exist", -1);
518+
} else {
519+
TraceUtil.setException(span2, e, true);
520+
throw e;
521+
}
513522
} finally {
514523
span2.end();
515524
}

0 commit comments

Comments
 (0)