Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve perf of blob retrieval within bucket #1329

Merged
merged 3 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 56 additions & 54 deletions disperser/common/v2/blobstore/dynamo_metadata_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,30 +291,33 @@ func (s *BlobMetadataStore) GetBlobMetadataByStatus(ctx context.Context, status
return metadata, nil
}

// queryBucketBlobMetadata returns blobs (as metadata) within range [startKey, endKey] from a single bucket.
// queryBucketBlobMetadata appends blobs (as metadata) within range (startKey, endKey) from a single bucket to the provided result slice.
// Results are ordered by <RequestedAt, Bobkey> in ascending order.
//
// The function handles DynamoDB's 1MB response size limitation by performing multiple queries if necessary.
// It filters out blobs at the exact startKey and endKey as they are exclusive bounds.
func (s *BlobMetadataStore) queryBucketBlobMetadata(
ctx context.Context,
bucket uint64,
ascending bool,
after BlobFeedCursor,
before BlobFeedCursor,
startKey string,
endKey string,
ascending bool,
limit int,
result []*v2.BlobMetadata,
lastProcessedCursor **BlobFeedCursor,
) ([]*v2.BlobMetadata, error) {
metadata := make([]*v2.BlobMetadata, 0)
var lastEvaledKey map[string]types.AttributeValue

for {
start := startKey
if lastEvaledKey != nil {
requestedAtBlobkey, err := UnmarshalRequestedAtBlobKey(lastEvaledKey)
if err != nil {
return nil, fmt.Errorf("failed to parse the RequestedAtBlobkey from the LastEvaluatedKey: %w", err)
return result, fmt.Errorf("failed to parse the RequestedAtBlobkey from the LastEvaluatedKey: %w", err)
}
start = requestedAtBlobkey
}

res, err := s.dynamoDBClient.QueryIndexWithPagination(
ctx,
s.tableName,
Expand All @@ -330,16 +333,40 @@ func (s *BlobMetadataStore) queryBucketBlobMetadata(
ascending,
)
if err != nil {
return nil, fmt.Errorf("query failed for bucket %d: %w", bucket, err)
return result, fmt.Errorf("query failed for bucket %d: %w", bucket, err)
}

// Collect results
for _, item := range res.Items {
bm, err := UnmarshalBlobMetadata(item)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal blob metadata: %w", err)
return result, fmt.Errorf("failed to unmarshal blob metadata: %w", err)
}

// Get blob key for filtering
blobKey, err := bm.BlobHeader.BlobKey()
if err != nil {
return result, fmt.Errorf("failed to get blob key: %w", err)
}

// Skip blobs at the endpoints (exclusive bounds)
if after.Equal(bm.RequestedAt, &blobKey) || before.Equal(bm.RequestedAt, &blobKey) {
continue
}

// Add to result
result = append(result, bm)

// Update last processed cursor
*lastProcessedCursor = &BlobFeedCursor{
RequestedAt: bm.RequestedAt,
BlobKey: &blobKey,
}

// Check limit
if limit > 0 && len(result) >= limit {
return result, nil
}
metadata = append(metadata, bm)
}

// Exhausted all items already
Expand All @@ -350,7 +377,7 @@ func (s *BlobMetadataStore) queryBucketBlobMetadata(
lastEvaledKey = res.LastEvaluatedKey
}

return metadata, nil
return result, nil
}

// GetBlobMetadataByRequestedAtForward returns blobs (as BlobMetadata) in cursor range
Expand All @@ -368,40 +395,28 @@ func (s *BlobMetadataStore) GetBlobMetadataByRequestedAtForward(
if !after.LessThan(&before) {
return nil, nil, errors.New("after cursor must be less than before cursor")
}

startBucket, endBucket := GetRequestedAtBucketIDRange(after.RequestedAt, before.RequestedAt)
startKey := after.ToCursorKey()
endKey := before.ToCursorKey()
result := make([]*v2.BlobMetadata, 0)
var lastProcessedCursor *BlobFeedCursor

for bucket := startBucket; bucket <= endBucket; bucket++ {
if limit > 0 && len(result) >= limit {
break
}
bucketMetadata, err := s.queryBucketBlobMetadata(ctx, bucket, startKey, endKey, true)
// Pass the result slice to be modified in-place along with cursors for filtering
var err error
result, err = s.queryBucketBlobMetadata(
ctx, bucket, true, after, before, startKey, endKey, limit, result, &lastProcessedCursor,
)
if err != nil {
return nil, nil, err
}
// Process bucket results
for _, bm := range bucketMetadata {
blobKey, err := bm.BlobHeader.BlobKey()
if err != nil {
return nil, nil, fmt.Errorf("failed to get blob key: %w", err)
}
// Skip blobs at the endpoints
if after.Equal(bm.RequestedAt, &blobKey) || before.Equal(bm.RequestedAt, &blobKey) {
continue
}
result = append(result, bm)
lastProcessedCursor = &BlobFeedCursor{
RequestedAt: bm.RequestedAt,
BlobKey: &blobKey,
}
if limit > 0 && len(result) >= limit {
break
}

if limit > 0 && len(result) >= limit {
break
}
}

return result, lastProcessedCursor, nil
}

Expand All @@ -420,6 +435,7 @@ func (s *BlobMetadataStore) GetBlobMetadataByRequestedAtBackward(
if !after.LessThan(&before) {
return nil, nil, errors.New("after cursor must be less than before cursor")
}

startBucket, endBucket := GetRequestedAtBucketIDRange(after.RequestedAt, before.RequestedAt)
startKey := after.ToCursorKey()
endKey := before.ToCursorKey()
Expand All @@ -428,31 +444,17 @@ func (s *BlobMetadataStore) GetBlobMetadataByRequestedAtBackward(

// Traverse buckets in reverse order
for bucket := endBucket; bucket >= startBucket; bucket-- {
if limit > 0 && len(result) >= limit {
break
}
bucketMetadata, err := s.queryBucketBlobMetadata(ctx, bucket, startKey, endKey, false)
// Pass the result slice to be modified in-place along with cursors for filtering
var err error
result, err = s.queryBucketBlobMetadata(
ctx, bucket, false, after, before, startKey, endKey, limit, result, &lastProcessedCursor,
)
if err != nil {
return nil, nil, err
}
// Process bucket results
for _, bm := range bucketMetadata {
blobKey, err := bm.BlobHeader.BlobKey()
if err != nil {
return nil, nil, fmt.Errorf("failed to get blob key: %w", err)
}
// Skip blobs at the endpoints
if before.Equal(bm.RequestedAt, &blobKey) || after.Equal(bm.RequestedAt, &blobKey) {
continue
}
result = append(result, bm)
lastProcessedCursor = &BlobFeedCursor{
RequestedAt: bm.RequestedAt,
BlobKey: &blobKey,
}
if limit > 0 && len(result) >= limit {
break
}

if limit > 0 && len(result) >= limit {
break
}
}
return result, lastProcessedCursor, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ func TestBlobMetadataStoreGetBlobMetadataByRequestedAtForward(t *testing.T) {
for i := 0; i < numBlobs; i++ {
metadata, lastProcessedCursor, err := blobMetadataStore.GetBlobMetadataByRequestedAtForward(ctx, startCursor, endCursor, 1)
require.NoError(t, err)
assert.Equal(t, 1, len(metadata))
require.Equal(t, 1, len(metadata))
checkBlobKeyEqual(t, keys[i], metadata[0].BlobHeader)
require.NotNil(t, lastProcessedCursor)
assert.Equal(t, keys[i], *lastProcessedCursor.BlobKey)
Expand Down
Loading