forked from wubero/code2vec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract.py
101 lines (83 loc) · 3.51 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/python
import itertools
import multiprocessing
import os
import sys
import shutil
import subprocess
from threading import Timer
import sys
from argparse import ArgumentParser
from subprocess import Popen, PIPE, STDOUT, call
TMP_DIR = "./"
def setDir():
global TMP_DIR
TMP_DIR = "./tmp/feature_extractor%d/" % (os.getpid())
if os.path.exists(TMP_DIR):
shutil.rmtree(TMP_DIR, ignore_errors=True)
os.makedirs(TMP_DIR)
def get_immediate_subdirectories(a_dir):
return [(os.path.join(a_dir, name)) for name in os.listdir(a_dir)
if os.path.isdir(os.path.join(a_dir, name))]
def ParallelExtractDir(args, dir):
ExtractFeaturesForDir(args, dir, "")
def ExtractFeaturesForDir(args, dir, prefix):
command = ['java', '-cp', args.jar, 'JavaExtractor.App',
'--max_path_length', str(args.max_path_length), '--max_path_width', str(args.max_path_width),
'--dir', dir, '--num_threads', str(args.num_threads)]
# print command
# os.system(command)
kill = lambda process: process.kill()
outputFileName = TMP_DIR + prefix + dir.split('/')[-1].split('\\')[-1]
failed = False
with open(outputFileName, 'a') as outputFile:
sleeper = subprocess.Popen(command, stdout=outputFile, stderr=subprocess.PIPE)
timer = Timer(600000, kill, [sleeper])
try:
timer.start()
stdout, stderr = sleeper.communicate()
finally:
timer.cancel()
if sleeper.poll() == 0:
if len(stderr) > 0:
print(sys.stderr, stderr, file=sys.stdout)
else:
print(sys.stderr, 'dir: ' + str(dir) + ' was not completed in time', file=sys.stdout)
failed = True
subdirs = get_immediate_subdirectories(dir)
for subdir in subdirs:
ExtractFeaturesForDir(args, subdir, prefix + dir.split('/')[-1] + '_')
if failed:
if os.path.exists(outputFileName):
os.remove(outputFileName)
def ExtractFeaturesForDirsList(args, dirs):
setDir()
try:
# p = multiprocessing.Pool(4)
# p.starmap(ParallelExtractDir, zip(itertools.repeat(args), dirs))
for d in dirs:
ExtractFeaturesForDir(args, d, '')
output_files = os.listdir(TMP_DIR)
for f in output_files:
os.system("cat %s/%s" % (TMP_DIR, f))
finally:
shutil.rmtree('./tmp/', ignore_errors=True)
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("-maxlen", "--max_path_length", dest="max_path_length", required=False, default=8)
parser.add_argument("-maxwidth", "--max_path_width", dest="max_path_width", required=False, default=2)
parser.add_argument("-threads", "--num_threads", dest="num_threads", required=False, default=64)
parser.add_argument("-j", "--jar", dest="jar", required=True)
parser.add_argument("-dir", "--dir", dest="dir", required=False)
parser.add_argument("-file", "--file", dest="file", required=False)
args = parser.parse_args()
if args.file is not None:
command = 'java -cp ' + args.jar + ' JavaExtractor.App --max_path_length ' + \
str(args.max_path_length) + ' --max_path_width ' + str(args.max_path_width) + ' --file ' + args.file
os.system(command)
elif args.dir is not None:
subdirs = get_immediate_subdirectories(args.dir)
to_extract = subdirs
if len(subdirs) == 0:
to_extract = [args.dir.rstrip('/')]
ExtractFeaturesForDirsList(args, to_extract)