1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
from __future__ import division
import py
import random
exclude_files = ["__init__.py", "conftest.py"]
def include_file(path):
if ("test" in str(path) or "tool" in str(path) or
"documentation" in str(path) or
"_cache" in str(path)):
return False
if path.basename in exclude_files:
return False
return True
def get_mod_from_path(path):
dirs = path.get("dirname")[0].split("/")
pypyindex = dirs.index("pypy")
return ".".join(dirs[pypyindex:] + path.get("purebasename"))
def find_references(path):
refs = []
for line in path.open("r"):
if line.startswith(" "): # ignore local imports to reduce graph size
continue
if "\\" in line: #ignore line continuations
continue
line = line.strip()
line = line.split("#")[0].strip()
if line.startswith("import pypy."): # import pypy.bla.whatever
if " as " not in line:
refs.append((line[7:].strip(), None))
else: # import pypy.bla.whatever as somethingelse
assert line.count(" as ") == 1
line = line.split(" as ")
refs.append((line[0][7:].strip(), line[1].strip()))
elif line.startswith("from ") and "pypy" in line: #from pypy.b import a
line = line[5:]
if " as " not in line:
line = line.split(" import ")
what = line[1].split(",")
for w in what:
refs.append((line[0].strip() + "." + w.strip(), None))
else: # prom pypy.b import a as c
if line.count(" as ") != 1 or "," in line:
print"can't handle this: " + line
continue
line = line.split(" as ")
what = line[0].replace(" import ", ".").replace(" ", "")
refs.append((what, line[1].strip()))
return refs
def get_module(ref, imports):
ref = ref.split(".")
i = len(ref)
while i:
possible_mod = ".".join(ref[:i])
if possible_mod in imports:
return possible_mod
i -= 1
return None
def casteljeau(points, t):
points = points[:]
while len(points) > 1:
for i in range(len(points) - 1):
points[i] = points[i] * (1 - t) + points[i + 1] * t
del points[-1]
return points[0]
def color(t):
casteljeau([0, 0, 1, 0, 0], t) / 0.375
class ModuleGraph(object):
def __init__(self, path):
self.imports = {}
self.clusters = {}
self.mod_to_cluster = {}
for f in path.visit("*.py"):
if include_file(f):
self.imports[get_mod_from_path(f)] = find_references(f)
self.remove_object_refs()
self.remove_double_refs()
self.incoming = {}
for mod in self.imports:
self.incoming[mod] = set()
for mod, refs in self.imports.iteritems():
for ref in refs:
if ref[0] in self.incoming:
self.incoming[ref[0]].add(mod)
self.remove_single_nodes()
self.topgraph_properties = ["rankdir=LR"]
def remove_object_refs(self):
# reduces cases like import rpython.translator.genc.basetype.CType to
# import rpython.translator.genc.basetype
for mod, refs in self.imports.iteritems():
i = 0
while i < len(refs):
if refs[i][0] in self.imports:
i += 1
else:
nref = get_module(refs[i][0], self.imports)
if nref is None:
print "removing", repr(refs[i])
del refs[i]
else:
refs[i] = (nref, None)
i += 1
def remove_double_refs(self):
# remove several references to the same module
for mod, refs in self.imports.iteritems():
i = 0
seen_refs = set()
while i < len(refs):
if refs[i] not in seen_refs:
seen_refs.add(refs[i])
i += 1
else:
del refs[i]
def remove_single_nodes(self):
# remove nodes that have no attached edges
rem = []
for mod, refs in self.imports.iteritems():
if len(refs) == 0 and len(self.incoming[mod]) == 0:
rem.append(mod)
for m in rem:
del self.incoming[m]
del self.imports[m]
def create_clusters(self):
self.topgraph_properties.append("compound=true;")
self.clustered = True
hierarchy = [set() for i in range(6)]
for mod in self.imports:
for i, d in enumerate(mod.split(".")):
hierarchy[i].add(d)
for i in range(6):
if len(hierarchy[i]) != 1:
break
for mod in self.imports:
cluster = mod.split(".")[i]
if i == len(mod.split(".")) - 1:
continue
if cluster not in self.clusters:
self.clusters[cluster] = set()
self.clusters[cluster].add(mod)
self.mod_to_cluster[mod] = cluster
def remove_tangling_randomly(self):
# remove edges to nodes that have a lot incoming edges randomly
tangled = []
for mod, incoming in self.incoming.iteritems():
if len(incoming) > 10:
tangled.append(mod)
for mod in tangled:
remove = set()
incoming = self.incoming[mod]
while len(remove) < len(incoming) * 0.80:
remove.add(random.choice(list(incoming)))
for rem in remove:
for i in range(len(self.imports[rem])):
if self.imports[rem][i][1] == mod:
break
del self.imports[rem][i]
incoming.remove(rem)
print "removing", mod, "<-", rem
self.remove_single_nodes()
def dotfile(self, dot):
f = dot.open("w")
f.write("digraph G {\n")
for prop in self.topgraph_properties:
f.write("\t%s\n" % prop)
#write clusters and inter-cluster edges
for cluster, nodes in self.clusters.iteritems():
f.write("\tsubgraph cluster_%s {\n" % cluster)
f.write("\t\tstyle=filled;\n\t\tcolor=lightgrey\n")
for node in nodes:
f.write('\t\t"%s";\n' % node[5:])
for mod, refs in self.imports.iteritems():
for ref in refs:
if mod in nodes and ref[0] in nodes:
f.write('\t\t"%s" -> "%s";\n' % (mod[5:], ref[0][5:]))
f.write("\t}\n")
#write edges between clusters
for mod, refs in self.imports.iteritems():
try:
nodes = self.clusters[self.mod_to_cluster[mod]]
except KeyError:
nodes = set()
for ref in refs:
if ref[0] not in nodes:
f.write('\t"%s" -> "%s";\n' % (mod[5:], ref[0][5:]))
f.write("}")
f.close()
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
path = py.path.local(sys.argv[1])
else:
path = py.path.local(".")
gr = ModuleGraph(path)
gr.create_clusters()
dot = path.join("import_graph.dot")
gr.dotfile(dot)
|