Skip to content

Commit 2950699

Browse files
authored
Adds batch information to GC logging (apache#5391)
Adds batch counter to log messages. Adds in-progress counters to long-running methods. Uses getCompletedTaskCount which should list the amount of completed tasks in the threadpool.
1 parent 4e0c998 commit 2950699

File tree

3 files changed

+40
-12
lines changed

3 files changed

+40
-12
lines changed

server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java

+32-8
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import java.io.FileNotFoundException;
2727
import java.io.IOException;
28+
import java.time.Duration;
2829
import java.util.ArrayList;
2930
import java.util.Collection;
3031
import java.util.Collections;
@@ -36,8 +37,9 @@
3637
import java.util.Objects;
3738
import java.util.Set;
3839
import java.util.SortedMap;
39-
import java.util.concurrent.ExecutorService;
40+
import java.util.concurrent.ThreadPoolExecutor;
4041
import java.util.concurrent.TimeUnit;
42+
import java.util.concurrent.atomic.AtomicInteger;
4143
import java.util.stream.Stream;
4244

4345
import org.apache.accumulo.core.Constants;
@@ -65,6 +67,7 @@
6567
import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
6668
import org.apache.accumulo.core.security.Authorizations;
6769
import org.apache.accumulo.core.util.Pair;
70+
import org.apache.accumulo.core.util.Timer;
6871
import org.apache.accumulo.core.util.UtilWaitThread;
6972
import org.apache.accumulo.core.util.threads.ThreadPools;
7073
import org.apache.accumulo.core.volume.Volume;
@@ -95,16 +98,19 @@ public class GCRun implements GarbageCollectionEnvironment {
9598
private final Ample.DataLevel level;
9699
private final ServerContext context;
97100
private final AccumuloConfiguration config;
101+
private final Duration loggingInterval = Duration.ofMinutes(1);
98102
private long candidates = 0;
99103
private long inUse = 0;
100104
private long deleted = 0;
101105
private long errors = 0;
106+
private AtomicInteger batchCount;
102107

103108
public GCRun(Ample.DataLevel level, ServerContext context) {
104109
this.log = LoggerFactory.getLogger(GCRun.class.getName() + "." + level.name());
105110
this.level = level;
106111
this.context = context;
107112
this.config = context.getConfiguration();
113+
this.batchCount = new AtomicInteger(0);
108114
}
109115

110116
@Override
@@ -132,7 +138,8 @@ public void deleteGcCandidates(Collection<GcCandidate> gcCandidates, GcCandidate
132138
return;
133139
}
134140

135-
log.info("Attempting to delete gcCandidates of type {} from metadata", type);
141+
log.info("Batch {} attempting to delete {} gcCandidates of type {} from metadata",
142+
batchCount.get(), gcCandidates.size(), type);
136143
context.getAmple().deleteGcCandidates(level, gcCandidates, type);
137144
}
138145

@@ -143,6 +150,7 @@ public List<GcCandidate> readCandidatesThatFitInMemory(Iterator<GcCandidate> can
143150
long candidateBatchSize = getCandidateBatchSize() / 2;
144151

145152
List<GcCandidate> candidatesBatch = new ArrayList<>();
153+
batchCount.incrementAndGet();
146154

147155
while (candidates.hasNext()) {
148156
GcCandidate candidate = candidates.next();
@@ -288,15 +296,17 @@ public void deleteConfirmedCandidates(SortedMap<String,GcCandidate> confirmedDel
288296

289297
List<GcCandidate> processedDeletes = Collections.synchronizedList(new ArrayList<>());
290298

291-
minimizeDeletes(confirmedDeletes, processedDeletes, fs, log);
299+
minimizeDeletes(confirmedDeletes, processedDeletes, fs, log, loggingInterval);
292300

293-
ExecutorService deleteThreadPool = ThreadPools.getServerThreadPools()
301+
ThreadPoolExecutor deleteThreadPool = ThreadPools.getServerThreadPools()
294302
.createExecutorService(config, Property.GC_DELETE_THREADS);
295303

296304
final List<Pair<Path,Path>> replacements = context.getVolumeReplacements();
297305

306+
log.info("Batch {} attempting to delete {} gcCandidate files", batchCount.get(),
307+
confirmedDeletes.size());
308+
Timer timer = Timer.startNew();
298309
for (final GcCandidate delete : confirmedDeletes.values()) {
299-
300310
Runnable deleteTask = () -> {
301311
boolean removeFlag = false;
302312

@@ -320,7 +330,7 @@ public void deleteConfirmedCandidates(SortedMap<String,GcCandidate> confirmedDel
320330
}
321331

322332
for (Path pathToDel : GcVolumeUtil.expandAllVolumesUri(fs, fullPath)) {
323-
log.debug("{} Deleting {}", fileActionPrefix, pathToDel);
333+
log.debug("Batch {} {} Deleting {}", batchCount.get(), fileActionPrefix, pathToDel);
324334

325335
if (moveToTrash(pathToDel) || fs.deleteRecursively(pathToDel)) {
326336
// delete succeeded, still want to delete
@@ -373,7 +383,12 @@ public void deleteConfirmedCandidates(SortedMap<String,GcCandidate> confirmedDel
373383
deleteThreadPool.shutdown();
374384

375385
try {
376-
while (!deleteThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) { // empty
386+
while (!deleteThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) {
387+
if (timer.hasElapsed(loggingInterval)) {
388+
log.info("Batch {} deleting file {} of {}", batchCount.get(),
389+
deleteThreadPool.getCompletedTaskCount(), confirmedDeletes.size());
390+
timer.restart();
391+
}
377392
}
378393
} catch (InterruptedException e1) {
379394
log.error("{}", e1.getMessage(), e1);
@@ -441,7 +456,8 @@ public Iterator<Map.Entry<String,Replication.Status>> getReplicationNeededIterat
441456

442457
@VisibleForTesting
443458
static void minimizeDeletes(SortedMap<String,GcCandidate> confirmedDeletes,
444-
List<GcCandidate> processedDeletes, VolumeManager fs, Logger logger) {
459+
List<GcCandidate> processedDeletes, VolumeManager fs, Logger logger,
460+
Duration loggingInterval) {
445461
Set<Path> seenVolumes = new HashSet<>();
446462

447463
// when deleting a dir and all files in that dir, only need to delete the dir.
@@ -451,7 +467,11 @@ static void minimizeDeletes(SortedMap<String,GcCandidate> confirmedDeletes,
451467

452468
String lastDirRel = null;
453469
Path lastDirAbs = null;
470+
Timer progressTimer = Timer.startNew();
471+
int progressCount = 0;
472+
int totalDeletes = confirmedDeletes.size();
454473
while (cdIter.hasNext()) {
474+
progressCount++;
455475
Map.Entry<String,GcCandidate> entry = cdIter.next();
456476
String relPath = entry.getKey();
457477
Path absPath = new Path(entry.getValue().getPath());
@@ -491,6 +511,10 @@ static void minimizeDeletes(SortedMap<String,GcCandidate> confirmedDeletes,
491511
lastDirAbs = null;
492512
}
493513
}
514+
if (progressTimer.hasElapsed(loggingInterval)) {
515+
logger.debug("Minimizing delete {} of {}", progressCount, totalDeletes);
516+
progressTimer.restart();
517+
}
494518
}
495519
}
496520

server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java

+6-3
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ public long collect(GarbageCollectionEnvironment gce)
387387

388388
Iterator<GcCandidate> candidatesIter = gce.getCandidates();
389389
long totalBlips = 0;
390+
int batchCount = 0;
390391

391392
while (candidatesIter.hasNext()) {
392393
List<GcCandidate> batchOfCandidates;
@@ -399,19 +400,21 @@ public long collect(GarbageCollectionEnvironment gce)
399400
} finally {
400401
candidatesSpan.end();
401402
}
402-
totalBlips = deleteBatch(gce, batchOfCandidates);
403+
batchCount++;
404+
totalBlips = deleteBatch(gce, batchOfCandidates, batchCount);
403405
}
404406
return totalBlips;
405407
}
406408

407409
/**
408410
* Given a sub-list of possible deletion candidates, process and remove valid deletion candidates.
409411
*/
410-
private long deleteBatch(GarbageCollectionEnvironment gce, List<GcCandidate> currentBatch)
411-
throws InterruptedException, TableNotFoundException, IOException {
412+
private long deleteBatch(GarbageCollectionEnvironment gce, List<GcCandidate> currentBatch,
413+
int batchCount) throws InterruptedException, TableNotFoundException, IOException {
412414

413415
long origSize = currentBatch.size();
414416
gce.incrementCandidatesStat(origSize);
417+
log.info("Batch {} total deletion candidates: {}", batchCount, origSize);
415418

416419
SortedMap<String,GcCandidate> candidateMap = makeRelative(currentBatch);
417420

server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import static org.junit.jupiter.api.Assertions.assertTrue;
3232

3333
import java.io.FileNotFoundException;
34+
import java.time.Duration;
3435
import java.util.ArrayList;
3536
import java.util.Arrays;
3637
import java.util.Collection;
@@ -192,7 +193,7 @@ public void testMinimizeDeletes() {
192193

193194
List<GcCandidate> processedDeletes = new ArrayList<>();
194195

195-
GCRun.minimizeDeletes(confirmed, processedDeletes, volMgr2, log);
196+
GCRun.minimizeDeletes(confirmed, processedDeletes, volMgr2, log, Duration.ofMinutes(1));
196197

197198
TreeMap<String,GcCandidate> expected = new TreeMap<>();
198199
expected.put("5a/t-0001", new GcCandidate("hdfs://nn1/accumulo/tables/5a/t-0001", 0L));

0 commit comments

Comments
 (0)