Skip to content

Commit 80a48cd

Browse files
authored
Filter based on columns declared in TabletMetadataCheck (apache#5335)
Adds filtering to TabletMetadataCheckIterator that will use matching families for the provided columns in order to reduce the amount of data that is fetched. A ResolvedColumns instance is returned by each TabletMetadataCheck instance that contains the set of columns and set of matching families to read. CompactionReservationCheck caches the ResolvedColumns instance statically to avoid having to recreate the set of columns and familes for each scan.
1 parent 55f05c6 commit 80a48cd

File tree

7 files changed

+311
-109
lines changed

7 files changed

+311
-109
lines changed

core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletMetadata.java

+163-1
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,28 @@
1818
*/
1919
package org.apache.accumulo.core.metadata.schema;
2020

21+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.MergedColumnFamily.MERGED_COLUMN;
22+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN;
2123
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_QUAL;
24+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_COLUMN;
25+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_NONCE_COLUMN;
2226
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_NONCE_QUAL;
2327
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_QUAL;
28+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.OPID_COLUMN;
2429
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.OPID_QUAL;
30+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.SELECTED_COLUMN;
2531
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.SELECTED_QUAL;
32+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.TIME_COLUMN;
2633
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.TIME_QUAL;
34+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SplitColumnFamily.UNSPLITTABLE_COLUMN;
35+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SuspendLocationColumn.SUSPEND_COLUMN;
36+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.AVAILABILITY_COLUMN;
2737
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.AVAILABILITY_QUAL;
38+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.MERGEABILITY_COLUMN;
2839
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.MERGEABILITY_QUAL;
40+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN;
2941
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_QUAL;
42+
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.REQUESTED_COLUMN;
3043
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.REQUESTED_QUAL;
3144

3245
import java.util.Collection;
@@ -40,10 +53,12 @@
4053
import java.util.Optional;
4154
import java.util.OptionalLong;
4255
import java.util.Set;
56+
import java.util.stream.Collectors;
4357

4458
import org.apache.accumulo.core.client.admin.TabletAvailability;
4559
import org.apache.accumulo.core.clientImpl.ClientContext;
4660
import org.apache.accumulo.core.clientImpl.TabletAvailabilityUtil;
61+
import org.apache.accumulo.core.data.ArrayByteSequence;
4762
import org.apache.accumulo.core.data.ByteSequence;
4863
import org.apache.accumulo.core.data.Key;
4964
import org.apache.accumulo.core.data.TableId;
@@ -58,6 +73,7 @@
5873
import org.apache.accumulo.core.metadata.StoredTabletFile;
5974
import org.apache.accumulo.core.metadata.SuspendingTServer;
6075
import org.apache.accumulo.core.metadata.TServerInstance;
76+
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
6177
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily;
6278
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ClonedColumnFamily;
6379
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CompactedColumnFamily;
@@ -75,6 +91,7 @@
7591
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily;
7692
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.UserCompactionRequestedColumnFamily;
7793
import org.apache.accumulo.core.tabletserver.log.LogEntry;
94+
import org.apache.accumulo.core.util.ColumnFQ;
7895
import org.apache.accumulo.core.zookeeper.ZcStat;
7996
import org.apache.commons.lang3.builder.ToStringBuilder;
8097
import org.apache.commons.lang3.builder.ToStringStyle;
@@ -202,7 +219,152 @@ public enum ColumnType {
202219
COMPACTED,
203220
USER_COMPACTION_REQUESTED,
204221
UNSPLITTABLE,
205-
MERGEABILITY
222+
MERGEABILITY;
223+
224+
public static final Map<ColumnType,Set<Text>> COLUMNS_TO_FAMILIES;
225+
public static final Map<ColumnType,ColumnFQ> COLUMNS_TO_QUALIFIERS;
226+
227+
static {
228+
ImmutableMap.Builder<ColumnType,Set<Text>> colsToFamilies = ImmutableMap.builder();
229+
ImmutableMap.Builder<ColumnType,ColumnFQ> colsToQualifiers = ImmutableMap.builder();
230+
231+
for (ColumnType column : EnumSet.allOf(ColumnType.class)) {
232+
switch (column) {
233+
case CLONED:
234+
colsToFamilies.put(column, Set.of(ClonedColumnFamily.NAME));
235+
break;
236+
case DIR:
237+
case FLUSH_ID:
238+
case TIME:
239+
case OPID:
240+
case SELECTED:
241+
case FLUSH_NONCE:
242+
colsToFamilies.put(column, Set.of(ServerColumnFamily.NAME));
243+
break;
244+
case FILES:
245+
colsToFamilies.put(column, Set.of(DataFileColumnFamily.NAME));
246+
break;
247+
case AVAILABILITY:
248+
case HOSTING_REQUESTED:
249+
case PREV_ROW:
250+
case MERGEABILITY:
251+
colsToFamilies.put(column, Set.of(TabletColumnFamily.NAME));
252+
break;
253+
case LAST:
254+
colsToFamilies.put(column, Set.of(LastLocationColumnFamily.NAME));
255+
break;
256+
case LOADED:
257+
colsToFamilies.put(column, Set.of(BulkFileColumnFamily.NAME));
258+
break;
259+
case LOCATION:
260+
colsToFamilies.put(column,
261+
Set.of(CurrentLocationColumnFamily.NAME, FutureLocationColumnFamily.NAME));
262+
break;
263+
case LOGS:
264+
colsToFamilies.put(column, Set.of(LogColumnFamily.NAME));
265+
break;
266+
case SCANS:
267+
colsToFamilies.put(column, Set.of(ScanFileColumnFamily.NAME));
268+
break;
269+
case SUSPEND:
270+
colsToFamilies.put(column, Set.of(SuspendLocationColumn.NAME));
271+
break;
272+
case ECOMP:
273+
colsToFamilies.put(column, Set.of(ExternalCompactionColumnFamily.NAME));
274+
break;
275+
case MERGED:
276+
colsToFamilies.put(column, Set.of(MergedColumnFamily.NAME));
277+
break;
278+
case COMPACTED:
279+
colsToFamilies.put(column, Set.of(CompactedColumnFamily.NAME));
280+
break;
281+
case USER_COMPACTION_REQUESTED:
282+
colsToFamilies.put(column, Set.of(UserCompactionRequestedColumnFamily.NAME));
283+
break;
284+
case UNSPLITTABLE:
285+
colsToFamilies.put(column, Set.of(SplitColumnFamily.NAME));
286+
break;
287+
default:
288+
throw new IllegalArgumentException("Unknown col type " + column);
289+
}
290+
}
291+
292+
for (ColumnType column : EnumSet.allOf(ColumnType.class)) {
293+
switch (column) {
294+
case CLONED:
295+
case COMPACTED:
296+
case ECOMP:
297+
case FILES:
298+
case LAST:
299+
case LOADED:
300+
case LOCATION:
301+
case LOGS:
302+
case SCANS:
303+
case USER_COMPACTION_REQUESTED:
304+
break;
305+
case DIR:
306+
colsToQualifiers.put(column, DIRECTORY_COLUMN);
307+
break;
308+
case FLUSH_ID:
309+
colsToQualifiers.put(column, FLUSH_COLUMN);
310+
break;
311+
case TIME:
312+
colsToQualifiers.put(column, TIME_COLUMN);
313+
break;
314+
case OPID:
315+
colsToQualifiers.put(column, OPID_COLUMN);
316+
break;
317+
case SELECTED:
318+
colsToQualifiers.put(column, SELECTED_COLUMN);
319+
break;
320+
case FLUSH_NONCE:
321+
colsToQualifiers.put(column, FLUSH_NONCE_COLUMN);
322+
break;
323+
case AVAILABILITY:
324+
colsToQualifiers.put(column, AVAILABILITY_COLUMN);
325+
break;
326+
case HOSTING_REQUESTED:
327+
colsToQualifiers.put(column, REQUESTED_COLUMN);
328+
break;
329+
case PREV_ROW:
330+
colsToQualifiers.put(column, PREV_ROW_COLUMN);
331+
break;
332+
case MERGEABILITY:
333+
colsToQualifiers.put(column, MERGEABILITY_COLUMN);
334+
break;
335+
case SUSPEND:
336+
colsToQualifiers.put(column, SUSPEND_COLUMN);
337+
break;
338+
case MERGED:
339+
colsToQualifiers.put(column, MERGED_COLUMN);
340+
break;
341+
case UNSPLITTABLE:
342+
colsToQualifiers.put(column, UNSPLITTABLE_COLUMN);
343+
break;
344+
default:
345+
throw new IllegalArgumentException("Unknown col type " + column);
346+
}
347+
}
348+
349+
COLUMNS_TO_FAMILIES = colsToFamilies.build();
350+
COLUMNS_TO_QUALIFIERS = colsToQualifiers.build();
351+
}
352+
353+
public static Set<ByteSequence> resolveFamilies(Set<ColumnType> columns) {
354+
return columns.stream()
355+
.flatMap(cf -> COLUMNS_TO_FAMILIES.get(cf).stream()
356+
.map(family -> new ArrayByteSequence(family.copyBytes())))
357+
.collect(Collectors.toUnmodifiableSet());
358+
}
359+
360+
public static Set<Text> resolveFamiliesAsText(ColumnType column) {
361+
return COLUMNS_TO_FAMILIES.get(column);
362+
}
363+
364+
public static ColumnFQ resolveQualifier(ColumnType columnType) {
365+
return COLUMNS_TO_QUALIFIERS.get(columnType);
366+
}
367+
206368
}
207369

208370
public static class Location {

core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletMetadataCheck.java

+30-4
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@
2020

2121
import java.util.Collections;
2222
import java.util.EnumSet;
23+
import java.util.Objects;
2324
import java.util.Set;
2425

26+
import org.apache.accumulo.core.data.ByteSequence;
27+
import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType;
28+
2529
/**
2630
* This interface facilitates atomic checks of tablet metadata prior to updating tablet metadata.
2731
* The way it is intended to be used is the following.
@@ -45,13 +49,35 @@ public interface TabletMetadataCheck {
4549
Set<TabletMetadata.ColumnType> ALL_COLUMNS =
4650
Collections.unmodifiableSet(EnumSet.allOf(TabletMetadata.ColumnType.class));
4751

52+
ResolvedColumns ALL_RESOLVED_COLUMNS = new ResolvedColumns(ALL_COLUMNS);
53+
4854
boolean canUpdate(TabletMetadata tabletMetadata);
4955

5056
/**
51-
* Determines what tablet metadata columns are read on the server side. Return
52-
* {@link #ALL_COLUMNS} to read all of a tablets metadata.
57+
* Determines what tablet metadata columns/families are read on the server side. Return
58+
* {@link #ALL_RESOLVED_COLUMNS} to read all of a tablets metadata. If all columns are included,
59+
* the families set will be empty which means read all families.
5360
*/
54-
default Set<TabletMetadata.ColumnType> columnsToRead() {
55-
return ALL_COLUMNS;
61+
default ResolvedColumns columnsToRead() {
62+
return ALL_RESOLVED_COLUMNS;
63+
}
64+
65+
class ResolvedColumns {
66+
private final Set<TabletMetadata.ColumnType> columns;
67+
private final Set<ByteSequence> families;
68+
69+
public ResolvedColumns(Set<ColumnType> columns) {
70+
this.columns = Objects.requireNonNull(columns);
71+
this.families = columns.equals(ALL_COLUMNS) ? Set.of() : ColumnType.resolveFamilies(columns);
72+
}
73+
74+
public EnumSet<ColumnType> getColumns() {
75+
return EnumSet.copyOf(columns);
76+
}
77+
78+
public Set<ByteSequence> getFamilies() {
79+
return families;
80+
}
5681
}
82+
5783
}

core/src/main/java/org/apache/accumulo/core/metadata/schema/TabletsMetadata.java

+8-97
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,7 @@
2121
import static com.google.common.base.Preconditions.checkState;
2222
import static java.util.stream.Collectors.groupingBy;
2323
import static java.util.stream.Collectors.toList;
24-
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN;
25-
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.FLUSH_COLUMN;
26-
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.OPID_COLUMN;
27-
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.SELECTED_COLUMN;
28-
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily.TIME_COLUMN;
29-
import static org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN;
24+
import static org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.PREV_ROW;
3025

3126
import java.io.IOException;
3227
import java.io.UncheckedIOException;
@@ -64,21 +59,6 @@
6459
import org.apache.accumulo.core.metadata.AccumuloTable;
6560
import org.apache.accumulo.core.metadata.schema.Ample.DataLevel;
6661
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
67-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily;
68-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ClonedColumnFamily;
69-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CompactedColumnFamily;
70-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily;
71-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
72-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ExternalCompactionColumnFamily;
73-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.FutureLocationColumnFamily;
74-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LastLocationColumnFamily;
75-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
76-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.MergedColumnFamily;
77-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
78-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SplitColumnFamily;
79-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.SuspendLocationColumn;
80-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily;
81-
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.UserCompactionRequestedColumnFamily;
8262
import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType;
8363
import org.apache.accumulo.core.metadata.schema.filters.TabletMetadataFilter;
8464
import org.apache.accumulo.core.security.Authorizations;
@@ -325,84 +305,15 @@ public Options checkConsistency() {
325305
@Override
326306
public Options fetch(ColumnType... colsToFetch) {
327307
Preconditions.checkArgument(colsToFetch.length > 0);
328-
329-
for (ColumnType colToFetch : colsToFetch) {
330-
331-
fetchedCols.add(colToFetch);
332-
333-
switch (colToFetch) {
334-
case CLONED:
335-
families.add(ClonedColumnFamily.NAME);
336-
break;
337-
case DIR:
338-
qualifiers.add(DIRECTORY_COLUMN);
339-
break;
340-
case FILES:
341-
families.add(DataFileColumnFamily.NAME);
342-
break;
343-
case FLUSH_ID:
344-
qualifiers.add(FLUSH_COLUMN);
345-
break;
346-
case AVAILABILITY:
347-
qualifiers.add(TabletsSection.TabletColumnFamily.AVAILABILITY_COLUMN);
348-
break;
349-
case HOSTING_REQUESTED:
350-
qualifiers.add(TabletsSection.TabletColumnFamily.REQUESTED_COLUMN);
351-
break;
352-
case LAST:
353-
families.add(LastLocationColumnFamily.NAME);
354-
break;
355-
case LOADED:
356-
families.add(BulkFileColumnFamily.NAME);
357-
break;
358-
case LOCATION:
359-
families.add(CurrentLocationColumnFamily.NAME);
360-
families.add(FutureLocationColumnFamily.NAME);
361-
break;
362-
case LOGS:
363-
families.add(LogColumnFamily.NAME);
364-
break;
365-
case PREV_ROW:
366-
qualifiers.add(PREV_ROW_COLUMN);
367-
break;
368-
case SCANS:
369-
families.add(ScanFileColumnFamily.NAME);
370-
break;
371-
case SUSPEND:
372-
families.add(SuspendLocationColumn.SUSPEND_COLUMN.getColumnFamily());
373-
break;
374-
case TIME:
375-
qualifiers.add(TIME_COLUMN);
376-
break;
377-
case ECOMP:
378-
families.add(ExternalCompactionColumnFamily.NAME);
379-
break;
380-
case MERGED:
381-
families.add(MergedColumnFamily.NAME);
382-
break;
383-
case OPID:
384-
qualifiers.add(OPID_COLUMN);
385-
break;
386-
case SELECTED:
387-
qualifiers.add(SELECTED_COLUMN);
388-
break;
389-
case COMPACTED:
390-
families.add(CompactedColumnFamily.NAME);
391-
break;
392-
case USER_COMPACTION_REQUESTED:
393-
families.add(UserCompactionRequestedColumnFamily.NAME);
394-
break;
395-
case UNSPLITTABLE:
396-
qualifiers.add(SplitColumnFamily.UNSPLITTABLE_COLUMN);
397-
break;
398-
case MERGEABILITY:
399-
qualifiers.add(TabletColumnFamily.MERGEABILITY_COLUMN);
400-
break;
401-
default:
402-
throw new IllegalArgumentException("Unknown col type " + colToFetch);
308+
for (var col : fetchedCols) {
309+
fetchedCols.add(col);
310+
var qualifier = ColumnType.resolveQualifier(col);
311+
if (qualifier != null) {
312+
qualifiers.add(qualifier);
313+
} else {
314+
families.addAll(ColumnType.resolveFamiliesAsText(col));
403315
}
404316
}
405-
406317
return this;
407318
}
408319

0 commit comments

Comments
 (0)