-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpagerank-map.py
executable file
·133 lines (104 loc) · 3.23 KB
/
pagerank-map.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python
import sys
import re
from subprocess import *
keyword_list = ["IGNORE", "ITERATION"]
"""
IGNORE will output a -99 as the node receiving the input
ITERATION ########## -10 ##########
#################### -1 #### comes from G
#################### -11 ### comes from self, when no outnodes
"""
logfile_name = "data_log.txt"
watch_nodes = [89,793]
def processLine(line):
"""This processes a input line of the form
NodeId:k rank, rank',a,b,c,d,e, ....
:line: a string of the format above.
:returns: returns a list
[node id, current rank, previous rank, \
[list of outnodes]]
"""
line = line.strip()
line = line.split('\t')
if line[0] in keyword_list:
return [line[0], int(line[1])]
regex = re.compile(".*:([0-9]+).*")
r = regex.search(line[0])
regex.match(line[0])
node_id = int(r.groups()[0])
numberlist = line[1].split(',')
current_rank = float(numberlist[0])
previous_rank = float(numberlist[1])
outnode_list = []
for u in range(2, len(numberlist)):
outnode_list.append(int(numberlist[u]))
return [node_id, current_rank, previous_rank, outnode_list]
page_rank_map_output = []
alpha = 0.85
alpha_bar = 1-alpha
list_of_line_structs = []
total_sum = 0.0
total_nodes = 0
ignore_set = set([])
for line in sys.stdin:
line = processLine(line)
list_of_line_structs.append(line)
n_nodes = len(list_of_line_structs)
for u in range(n_nodes):
r = list_of_line_structs[u]
if (r[0] == "ITERATION"):
page_rank_map_output.append(str(-10) + '\t' + str(r[1]) + '\n')
continue
if (r[0] == "IGNORE"):
ignore_set.add(r[1])
continue
if len(r[3]) > 0:
fraction = 1.0/len(r[3])
for outnode in r[3]:
contribution = r[1] * fraction * alpha
inc = r[1] * fraction * alpha
"""
If something is in the ignore_set, then there is no need to
recompute its page rank. To do this, we do not any outnodes that
are in the ignore list.
Finally, at the VERY END, we simulate a contribution from every
member of the ignore list, to itself. To make sure that its
rating is preserved.
"""
if outnode not in ignore_set:
page_rank_map_output.append(str(outnode) + '\t' + str(contribution) + \
'\t' + str(r[0]) + '\t' + str(691.0) + '\n')
"""
string is the the form:
<node j to add the contribution to>,
<the contribution amount>
<node k the contribution came from>
<reserved for passing along the previous rank>
"""
else:
if outnode not in ignore_set:
page_rank_map_output.append(str(r[0]) + '\t' + str(r[1]*alpha) +
'\t' + str(-11) + '\t' + \
str(691.0) + '\n')
"""
We need to account for the matrix G.
In here, EVERY NODE contributes to EVERY NODE (even itself).
"""
#for v in range(n_nodes):
#G_contribution = list_of_line_structs[v][1] * 1.0/n_nodes * (1-alpha)
#page_rank_map_output.append(str(r[0]) + '\t' + \
#str(G_contribution) + '\t' + \
#'-1' + '\t' + \
#str(r[2]) + '\n')
#G_contribution = (1-alpha)*total_sum * n_nodes
page_rank_map_output.append(str(r[0]) + '\t' + \
str(1-alpha) + '\t' + \
str(-1) + '\t' + \
str(r[1]) + '\n')
#page_rank_map_output.append(str(r[0]) + '\t' + \
#str((1-alpha)*total_sum/total_nodes) + '\t' + \
#'-1' + '\t' + \
#str(r[1]) + '\n')
for s in page_rank_map_output:
sys.stdout.write(s)