-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
memwalk.rake
195 lines (179 loc) · 5.8 KB
/
memwalk.rake
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Walks the memory dumped into heap.json, and produces a graph of the memory dumped in diff.json
# If a single argument (a hex address to one object) is given, the graph is limited to this object and what references it
# The heap dumps should be in the format produced by Ruby ObjectSpace in Ruby version 2.1.0 or later.
#
# The command produces a .dot file that can be rendered with graphwiz dot into SVG. If a memwalk is performed for all
# objects in the diff.json, the output file name is memwalk.dot. If it is produced for a single address, the name of the
# output file is memwalk-<address>.dot
#
# The dot file can be rendered with something like: dot -Tsvg -omemwalk.svg memwalk.dot
#
desc "Process a diff.json of object ids, and a heap.json of a Ruby 2.1.0 ObjectSpace dump and produce a graph"
task :memwalk, [:id] do |t, args|
puts "Memwalk"
puts "Computing for #{args[:id] ? args[:id] : 'all'}"
@single_id = args[:id] ? args[:id].to_i(16) : nil
require 'json'
#require 'debug'
TYPE = "type".freeze
ROOT = "root".freeze
ROOT_UC = "ROOT".freeze
ADDR = "address".freeze
NODE = "NODE".freeze
STRING = "STRING".freeze
DATA = "DATA".freeze
HASH = "HASH".freeze
ARRAY = "ARRAY".freeze
OBJECT = "OBJECT".freeze
CLASS = "CLASS".freeze
allocations = {}
# An array of integer addresses of the objects to trace bindings for
diff_index = {}
puts "Reading data"
begin
puts "Reading diff"
lines = 0;
File.readlines("diff.json").each do | line |
lines += 1
diff = JSON.parse(line)
case diff[ TYPE ]
when STRING, DATA, HASH, ARRAY
# skip the strings
else
diff_index[ diff[ ADDR ].to_i(16) ] = diff
end
end
puts "Read #{lines} number of diffs"
rescue => e
raise "ERROR READING DIFF at line #{lines} #{e.message[0, 200]}"
end
begin
puts "Reading heap"
lines = 0
allocation = nil
File.readlines("heap.json").each do | line |
lines += 1
allocation = JSON.parse(line)
case allocation[ TYPE ]
when ROOT_UC
# Graph for single id must include roots, as it may be a root that holds on to the reference
# a global variable, thread, etc.
#
if @single_id
allocations[ allocation[ ROOT ] ] = allocation
end
when NODE
# skip the NODE objects - they represent the loaded ruby code
when STRING
# skip all strings - they are everywhere
else
allocations[ allocation[ ADDR ].to_i(16) ] = allocation
end
end
puts "Read #{lines} number of entries"
rescue => e
require 'debug'
puts "ERROR READING HEAP #{e.message[0, 200]}"
raise e
end
@heap = allocations
puts "Building reference index"
# References is an index from a referenced object to an array with addresses to the objects that references it
@references = Hash.new { |h, k| h[k] = [] }
REFERENCES = "references".freeze
allocations.each do |k,v|
refs = v[ REFERENCES ]
if refs.is_a?(Array)
refs.each {|addr| @references[ addr.to_i(16) ] << k }
end
end
@printed = Set.new()
def print_object(addr, entry)
# only print each node once
return unless @printed.add?(addr)
begin
if addr.is_a?(String)
@output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr}\"];\n")
else
@output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr.to_s(16)}\"];\n")
end
rescue => e
require 'debug'
raise e
end
end
def node_label(addr, entry)
if entry[ TYPE ] == OBJECT
class_ref = entry[ "class" ].to_i(16)
@heap[ class_ref ][ "name" ]
elsif entry[ TYPE ] == CLASS
"CLASS #{entry[ "name"]}"
else
entry[TYPE]
end
end
def node_name(addr)
return addr if addr.is_a? String
addr.to_s(16)
end
def print_edge(from_addr, to_addr)
@output.write("x#{node_name(from_addr)}->x#{node_name(to_addr)};\n")
end
def closure_and_edges(diff)
edges = Set.new()
walked = Set.new()
puts "Number of diffs referenced = #{diff.count {|k,_| @references[k].is_a?(Array) && @references[k].size() > 0 }}"
diff.each {|k,_| walk(k, edges, walked) }
edges.each {|e| print_edge(*e) }
end
def walk(addr, edges, walked)
if !@heap[ addr ].nil?
print_object(addr, @heap[addr])
@references [ addr ].each do |r|
walk_to_object(addr, r, edges, walked)
end
end
end
def walk_to_object(to_addr, cursor, edges, walked)
return unless walked
# if walked to an object, or everything if a single_id is the target
if @heap[ cursor ][ TYPE ] == OBJECT || (@single_id && @heap[ cursor ][ TYPE ] == ROOT_UC || @heap[ cursor ][ TYPE ] == CLASS )
# and the edge is unique
if edges.add?( [ cursor, to_addr ] )
# then we may not have visited objects this objects is being referred from
print_object(cursor, @heap[ cursor ])
# Do not follow what binds a class
if @heap[ cursor ][ TYPE ] != CLASS
@references[ cursor ].each do |r|
walk_to_object(cursor, r, edges, walked.add?(r))
walked.delete(r)
end
end
end
else
# continue search until Object
@references[cursor].each do |r|
walk_to_object(to_addr, r, edges, walked.add?(r))
end
end
end
def single_closure_and_edges(the_target)
edges = Set.new()
walked = Set.new()
walk(the_target, edges, walked)
edges.each {|e| print_edge(*e) }
end
puts "creating graph"
if @single_id
@output = File.open("memwalk-#{@single_id.to_s(16)}.dot", "w")
@output.write("digraph root {\n")
single_closure_and_edges(@single_id)
else
@output = File.open("memwalk.dot", "w")
@output.write("digraph root {\n")
closure_and_edges(diff_index)
end
@output.write("}\n")
@output.close
puts "done"
end