Skip to content

Commit

Permalink
fix colbert rank calculation (#399)
Browse files Browse the repository at this point in the history
  • Loading branch information
epinzur authored Apr 30, 2024
1 parent 093d24a commit b9b8995
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
6 changes: 3 additions & 3 deletions libs/colbert/ragstack_colbert/colbert_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,16 +246,16 @@ async def _fetch_chunk_data(
chunk_data_map[chunk] = chunk_data

answers: List[RetrievedChunk] = []
rank = 1
for chunk in chunks_by_score:

for idx, chunk in enumerate(chunks_by_score):
score = chunk_scores[chunk]
chunk_data = chunk_data_map[chunk]
answers.append(
RetrievedChunk(
doc_id=chunk.doc_id,
chunk_id=chunk.chunk_id,
score=score.item(), # Ensure score is a scalar if it's a tensor
rank=rank,
rank=idx + 1,
data=chunk_data,
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,6 @@ def generate_texts(text, chunk_size, overlap_size):
for chunk in chunks:
logging.info(f"got {chunk}")
assert len(chunks) == 5
assert len(chunks[0].data.text) > 0
assert len(chunks[0].data.text) > 0
assert chunks[0].rank == 1
assert chunks[1].rank == 2

0 comments on commit b9b8995

Please sign in to comment.