Skip to content

yeshiquan/mmse

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

34 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

A Memory-based Search Engine(MMSE)

编译 & 运行

cmake CmakeLists.txt
make
./mmse_test

索引构建

IndexWriter index_writer;

std::vector<std::string> doc_text = {
    "a b c d e f g a a a b c", // doc_id: 0
    "a b c d e f g a b c", // doc_id: 1
    "a b c d e f g a a a", // doc_id: 2
    "c d e", // doc_id: 3
    "c d e z", // doc_id: 4
    "f z c", // doc_id: 5
    "f g z", // doc_id: 6
    "f g", // doc_id: 7
    "f g", // doc_id: 8
    "g" // doc_id: 9
};

for (int i = 0; i < doc_text.size(); ++i) {
    auto doc = std::make_shared<Document>(i);
    doc->add_field(new TextField(0, doc_text[i], FieldStoreType::YES));
    doc->add_field(new NumberField(1, i*2, FieldStoreType::YES));
    doc->add_field(new StringField(2, "hello", FieldStoreType::YES));
    index_writer.add_document(doc);
}
index_writer.build();

检索

Query* query1 = new TermQuery(Term("content", "f"));
Query* query2 = new TermQuery(Term("content", "c"));
BooleanQuery* query = new BooleanQuery();
query->add(query1, Occur::MUST);
query->add(query2, Occur::MUST_NOT);
Weight* weight = query->make_weight();
Scorer* scorer = weight->make_scorer();

std::vector<DocId> result;
DocId doc = scorer->next_doc();
while (doc != NO_MORE_DOCS) {
    std::cout << "Query a doc -> " << doc << std::endl;
    result.push_back(doc);
    doc = scorer->next_doc();
}