forked from Kolkir/code2seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcpp_extractor.py
52 lines (47 loc) · 1.82 KB
/
cpp_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
from cppminer.cpp_parser import AstParser
from cppminer.cpp_parser.sample import make_str_key
from common import PathContextInformation
import tempfile
class CppExtractor:
def __init__(
self,
config,
):
self.config = config
self.parser = AstParser(
max_contexts_num=self.config.MAX_CONTEXTS,
max_path_len=self.config.MAX_PATH_LENGTH,
max_subtokens_num=self.config.MAX_NAME_PARTS,
max_ast_depth=100,
out_path=None,
)
def extract_paths(self, code_string):
tmp = tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".cc")
try:
tmp.write(code_string)
tmp.close()
return self.extract_paths(compiler_args=[], file_path=tmp.name)
finally:
os.unlink(tmp.name)
def extract_paths(self, compiler_args, file_path):
self.parser.parse(compiler_args=compiler_args, file_path=file_path)
pc_info_dict = {}
result = []
for sample in self.parser.samples:
for context in sample.contexts:
info_context = {
"name1": make_str_key(context.start_token),
"name2": make_str_key(context.end_token),
"path": make_str_key(context.path.tokens),
"shortPath": make_str_key(context.path.tokens),
}
pc_info = PathContextInformation(info_context)
pc_info_dict[
(pc_info.token1, pc_info.shortPath, pc_info.token2)
] = pc_info
result_line = str(sample)
space_padding = " " * (self.config.DATA_NUM_CONTEXTS - len(sample.contexts))
result_line += space_padding
result.append(result_line)
return result, pc_info_dict