Skip to content

Commit 3826ebd

Browse files
authored
Log iteration to result count in BulkImport code (#5312)
* Log iteration to result count in BulkImport code
1 parent aa7944b commit 3826ebd

File tree

1 file changed

+41
-4
lines changed
  • server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2

1 file changed

+41
-4
lines changed

server/manager/src/main/java/org/apache/accumulo/manager/tableOps/bulkVer2/LoadFiles.java

+41-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOCATION;
2424
import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW;
2525

26+
import java.time.Duration;
2627
import java.util.ArrayList;
2728
import java.util.Comparator;
2829
import java.util.HashMap;
@@ -63,6 +64,7 @@
6364
import org.apache.accumulo.core.util.MapCounter;
6465
import org.apache.accumulo.core.util.PeekingIterator;
6566
import org.apache.accumulo.core.util.TextUtil;
67+
import org.apache.accumulo.core.util.Timer;
6668
import org.apache.accumulo.manager.Manager;
6769
import org.apache.accumulo.manager.tableOps.ManagerRepo;
6870
import org.apache.accumulo.server.fs.VolumeManager;
@@ -92,6 +94,7 @@ public LoadFiles(BulkInfo bulkInfo) {
9294

9395
@Override
9496
public long isReady(long tid, Manager manager) throws Exception {
97+
log.info("Starting bulk import for {} (tid = {})", bulkInfo.sourceDir, FateTxId.formatTid(tid));
9598
if (manager.onlineTabletServers().isEmpty()) {
9699
log.warn("There are no tablet server to process bulkDir import, waiting (tid = "
97100
+ FateTxId.formatTid(tid) + ")");
@@ -312,6 +315,16 @@ long finish() throws Exception {
312315
}
313316
}
314317

318+
/**
319+
* Stats for the loadFiles method. Helps track wasted time and iterations.
320+
*/
321+
private static class ImportTimingStats {
322+
Duration totalWastedTime = Duration.ZERO;
323+
long wastedIterations = 0;
324+
long tabletCount = 0;
325+
long callCount = 0;
326+
}
327+
315328
/**
316329
* Make asynchronous load calls to each overlapping Tablet in the bulk mapping. Return a sleep
317330
* time to isReady based on a factor of the TabletServer with the most Tablets. This method will
@@ -341,20 +354,33 @@ private long loadFiles(TableId tableId, Path bulkDir, LoadMappingIterator loadMa
341354

342355
loader.start(bulkDir, manager, tid, bulkInfo.setTime);
343356

344-
long t1 = System.currentTimeMillis();
357+
ImportTimingStats importTimingStats = new ImportTimingStats();
358+
359+
Timer timer = Timer.startNew();
345360
while (lmi.hasNext()) {
346361
loadMapEntry = lmi.next();
347362
List<TabletMetadata> tablets =
348-
findOverlappingTablets(fmtTid, loadMapEntry.getKey(), tabletIter);
363+
findOverlappingTablets(fmtTid, loadMapEntry.getKey(), tabletIter, importTimingStats);
349364
loader.load(tablets, loadMapEntry.getValue());
350365
}
366+
Duration totalProcessingTime = timer.elapsed();
351367

352368
log.trace("{}: Completed Finding Overlapping Tablets", fmtTid);
353369

370+
if (importTimingStats.callCount > 0) {
371+
log.debug(
372+
"Bulk import stats for {} (tid = {}): processed {} tablets in {} calls which took {}ms ({} nanos). Skipped {} iterations which took {}ms ({} nanos) or {}% of the processing time.",
373+
bulkInfo.sourceDir, FateTxId.formatTid(tid), importTimingStats.tabletCount,
374+
importTimingStats.callCount, totalProcessingTime.toMillis(),
375+
totalProcessingTime.toNanos(), importTimingStats.wastedIterations,
376+
importTimingStats.totalWastedTime.toMillis(), importTimingStats.totalWastedTime.toNanos(),
377+
(importTimingStats.totalWastedTime.toNanos() * 100) / totalProcessingTime.toNanos());
378+
}
379+
354380
long sleepTime = loader.finish();
355381
if (sleepTime > 0) {
356382
log.trace("{}: Tablet Max Sleep is {}", fmtTid, sleepTime);
357-
long scanTime = Math.min(System.currentTimeMillis() - t1, 30_000);
383+
long scanTime = Math.min(totalProcessingTime.toMillis(), 30_000);
358384
log.trace("{}: Scan time is {}", fmtTid, scanTime);
359385
sleepTime = Math.max(sleepTime, scanTime * 2);
360386
}
@@ -369,7 +395,7 @@ private long loadFiles(TableId tableId, Path bulkDir, LoadMappingIterator loadMa
369395
* Find all the tablets within the provided bulk load mapping range.
370396
*/
371397
private List<TabletMetadata> findOverlappingTablets(String fmtTid, KeyExtent loadRange,
372-
Iterator<TabletMetadata> tabletIter) {
398+
Iterator<TabletMetadata> tabletIter, ImportTimingStats importTimingStats) {
373399

374400
TabletMetadata currTablet = null;
375401

@@ -381,12 +407,18 @@ private List<TabletMetadata> findOverlappingTablets(String fmtTid, KeyExtent loa
381407

382408
int cmp;
383409

410+
long wastedIterations = 0;
411+
Timer timer = Timer.startNew();
412+
384413
// skip tablets until we find the prevEndRow of loadRange
385414
while ((cmp = PREV_COMP.compare(currTablet.getPrevEndRow(), loadRange.prevEndRow())) < 0) {
415+
wastedIterations++;
386416
log.trace("{}: Skipping tablet: {}", fmtTid, currTablet.getExtent());
387417
currTablet = tabletIter.next();
388418
}
389419

420+
Duration wastedTime = timer.elapsed();
421+
390422
if (cmp != 0) {
391423
throw new IllegalStateException(
392424
"Unexpected prev end row " + currTablet.getExtent() + " " + loadRange);
@@ -407,6 +439,11 @@ private List<TabletMetadata> findOverlappingTablets(String fmtTid, KeyExtent loa
407439
throw new IllegalStateException("Unexpected end row " + currTablet + " " + loadRange);
408440
}
409441

442+
importTimingStats.wastedIterations += wastedIterations;
443+
importTimingStats.totalWastedTime = importTimingStats.totalWastedTime.plus(wastedTime);
444+
importTimingStats.tabletCount += tablets.size();
445+
importTimingStats.callCount++;
446+
410447
return tablets;
411448
} catch (NoSuchElementException e) {
412449
NoSuchElementException ne2 = new NoSuchElementException(

0 commit comments

Comments
 (0)