Skip to content

Commit 4879e6c

Browse files
committed
Merge branch '3.1'
2 parents e941517 + eb7f783 commit 4879e6c

File tree

3 files changed

+40
-12
lines changed

3 files changed

+40
-12
lines changed

server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java

+32-8
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
import java.io.FileNotFoundException;
2828
import java.io.IOException;
29+
import java.time.Duration;
2930
import java.util.ArrayList;
3031
import java.util.Collection;
3132
import java.util.Collections;
@@ -37,8 +38,9 @@
3738
import java.util.Objects;
3839
import java.util.Set;
3940
import java.util.SortedMap;
40-
import java.util.concurrent.ExecutorService;
41+
import java.util.concurrent.ThreadPoolExecutor;
4142
import java.util.concurrent.TimeUnit;
43+
import java.util.concurrent.atomic.AtomicInteger;
4244
import java.util.stream.Stream;
4345

4446
import org.apache.accumulo.core.Constants;
@@ -63,6 +65,7 @@
6365
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
6466
import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
6567
import org.apache.accumulo.core.security.Authorizations;
68+
import org.apache.accumulo.core.util.Timer;
6669
import org.apache.accumulo.core.util.threads.ThreadPools;
6770
import org.apache.accumulo.core.volume.Volume;
6871
import org.apache.accumulo.server.ServerContext;
@@ -88,16 +91,19 @@ public class GCRun implements GarbageCollectionEnvironment {
8891
private final Ample.DataLevel level;
8992
private final ServerContext context;
9093
private final AccumuloConfiguration config;
94+
private final Duration loggingInterval = Duration.ofMinutes(1);
9195
private long candidates = 0;
9296
private long inUse = 0;
9397
private long deleted = 0;
9498
private long errors = 0;
99+
private AtomicInteger batchCount;
95100

96101
public GCRun(Ample.DataLevel level, ServerContext context) {
97102
this.log = LoggerFactory.getLogger(GCRun.class.getName() + "." + level.name());
98103
this.level = level;
99104
this.context = context;
100105
this.config = context.getConfiguration();
106+
this.batchCount = new AtomicInteger(0);
101107
}
102108

103109
@Override
@@ -125,7 +131,8 @@ public void deleteGcCandidates(Collection<GcCandidate> gcCandidates, GcCandidate
125131
return;
126132
}
127133

128-
log.info("Attempting to delete gcCandidates of type {} from metadata", type);
134+
log.info("Batch {} attempting to delete {} gcCandidates of type {} from metadata",
135+
batchCount.get(), gcCandidates.size(), type);
129136
context.getAmple().deleteGcCandidates(level, gcCandidates, type);
130137
}
131138

@@ -136,6 +143,7 @@ public List<GcCandidate> readCandidatesThatFitInMemory(Iterator<GcCandidate> can
136143
long candidateBatchSize = getCandidateBatchSize() / 2;
137144

138145
List<GcCandidate> candidatesBatch = new ArrayList<>();
146+
batchCount.incrementAndGet();
139147

140148
while (candidates.hasNext()) {
141149
GcCandidate candidate = candidates.next();
@@ -279,15 +287,17 @@ public void deleteConfirmedCandidates(SortedMap<String,GcCandidate> confirmedDel
279287

280288
List<GcCandidate> processedDeletes = Collections.synchronizedList(new ArrayList<>());
281289

282-
minimizeDeletes(confirmedDeletes, processedDeletes, fs, log);
290+
minimizeDeletes(confirmedDeletes, processedDeletes, fs, log, loggingInterval);
283291

284-
ExecutorService deleteThreadPool = ThreadPools.getServerThreadPools()
292+
ThreadPoolExecutor deleteThreadPool = ThreadPools.getServerThreadPools()
285293
.createExecutorService(config, Property.GC_DELETE_THREADS);
286294

287295
final Map<Path,Path> replacements = context.getVolumeReplacements();
288296

297+
log.info("Batch {} attempting to delete {} gcCandidate files", batchCount.get(),
298+
confirmedDeletes.size());
299+
Timer timer = Timer.startNew();
289300
for (final GcCandidate delete : confirmedDeletes.values()) {
290-
291301
Runnable deleteTask = () -> {
292302
boolean removeFlag = false;
293303

@@ -311,7 +321,7 @@ public void deleteConfirmedCandidates(SortedMap<String,GcCandidate> confirmedDel
311321
}
312322

313323
for (Path pathToDel : GcVolumeUtil.expandAllVolumesUri(fs, fullPath)) {
314-
log.debug("{} Deleting {}", fileActionPrefix, pathToDel);
324+
log.debug("Batch {} {} Deleting {}", batchCount.get(), fileActionPrefix, pathToDel);
315325

316326
if (moveToTrash(pathToDel) || fs.deleteRecursively(pathToDel)) {
317327
// delete succeeded, still want to delete
@@ -364,7 +374,12 @@ public void deleteConfirmedCandidates(SortedMap<String,GcCandidate> confirmedDel
364374
deleteThreadPool.shutdown();
365375

366376
try {
367-
while (!deleteThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) { // empty
377+
while (!deleteThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) {
378+
if (timer.hasElapsed(loggingInterval)) {
379+
log.info("Batch {} deleting file {} of {}", batchCount.get(),
380+
deleteThreadPool.getCompletedTaskCount(), confirmedDeletes.size());
381+
timer.restart();
382+
}
368383
}
369384
} catch (InterruptedException e1) {
370385
log.error("{}", e1.getMessage(), e1);
@@ -408,7 +423,8 @@ public void incrementInUseStat(long i) {
408423

409424
@VisibleForTesting
410425
static void minimizeDeletes(SortedMap<String,GcCandidate> confirmedDeletes,
411-
List<GcCandidate> processedDeletes, VolumeManager fs, Logger logger) {
426+
List<GcCandidate> processedDeletes, VolumeManager fs, Logger logger,
427+
Duration loggingInterval) {
412428
Set<Path> seenVolumes = new HashSet<>();
413429

414430
// when deleting a dir and all files in that dir, only need to delete the dir.
@@ -418,7 +434,11 @@ static void minimizeDeletes(SortedMap<String,GcCandidate> confirmedDeletes,
418434

419435
String lastDirRel = null;
420436
Path lastDirAbs = null;
437+
Timer progressTimer = Timer.startNew();
438+
int progressCount = 0;
439+
int totalDeletes = confirmedDeletes.size();
421440
while (cdIter.hasNext()) {
441+
progressCount++;
422442
Map.Entry<String,GcCandidate> entry = cdIter.next();
423443
String relPath = entry.getKey();
424444
Path absPath = new Path(entry.getValue().getPath());
@@ -458,6 +478,10 @@ static void minimizeDeletes(SortedMap<String,GcCandidate> confirmedDeletes,
458478
lastDirAbs = null;
459479
}
460480
}
481+
if (progressTimer.hasElapsed(loggingInterval)) {
482+
logger.debug("Minimizing delete {} of {}", progressCount, totalDeletes);
483+
progressTimer.restart();
484+
}
461485
}
462486
}
463487

server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java

+6-3
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ public long collect(GarbageCollectionEnvironment gce)
344344

345345
Iterator<GcCandidate> candidatesIter = gce.getCandidates();
346346
long totalBlips = 0;
347+
int batchCount = 0;
347348

348349
while (candidatesIter.hasNext()) {
349350
List<GcCandidate> batchOfCandidates;
@@ -356,19 +357,21 @@ public long collect(GarbageCollectionEnvironment gce)
356357
} finally {
357358
candidatesSpan.end();
358359
}
359-
totalBlips = deleteBatch(gce, batchOfCandidates);
360+
batchCount++;
361+
totalBlips = deleteBatch(gce, batchOfCandidates, batchCount);
360362
}
361363
return totalBlips;
362364
}
363365

364366
/**
365367
* Given a sub-list of possible deletion candidates, process and remove valid deletion candidates.
366368
*/
367-
private long deleteBatch(GarbageCollectionEnvironment gce, List<GcCandidate> currentBatch)
368-
throws InterruptedException, TableNotFoundException, IOException {
369+
private long deleteBatch(GarbageCollectionEnvironment gce, List<GcCandidate> currentBatch,
370+
int batchCount) throws InterruptedException, TableNotFoundException, IOException {
369371

370372
long origSize = currentBatch.size();
371373
gce.incrementCandidatesStat(origSize);
374+
log.info("Batch {} total deletion candidates: {}", batchCount, origSize);
372375

373376
SortedMap<String,GcCandidate> candidateMap = makeRelative(currentBatch);
374377

server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import static org.junit.jupiter.api.Assertions.assertTrue;
3232

3333
import java.io.FileNotFoundException;
34+
import java.time.Duration;
3435
import java.util.ArrayList;
3536
import java.util.Arrays;
3637
import java.util.Collection;
@@ -181,7 +182,7 @@ public void testMinimizeDeletes() {
181182

182183
List<GcCandidate> processedDeletes = new ArrayList<>();
183184

184-
GCRun.minimizeDeletes(confirmed, processedDeletes, volMgr2, log);
185+
GCRun.minimizeDeletes(confirmed, processedDeletes, volMgr2, log, Duration.ofMinutes(1));
185186

186187
TreeMap<String,GcCandidate> expected = new TreeMap<>();
187188
expected.put("5a/t-0001", new GcCandidate("hdfs://nn1/accumulo/tables/5a/t-0001", 0L));

0 commit comments

Comments
 (0)