Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Views: 184583
1
#!/usr/bin/env python3
2
#
3
# Copyright 2008-2017 Jose Fonseca
4
#
5
# This program is free software: you can redistribute it and/or modify it
6
# under the terms of the GNU Lesser General Public License as published
7
# by the Free Software Foundation, either version 3 of the License, or
8
# (at your option) any later version.
9
#
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU Lesser General Public License for more details.
14
#
15
# You should have received a copy of the GNU Lesser General Public License
16
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17
#
18
19
"""Generate a dot graph from the output of several profilers."""
20
21
__author__ = "Jose Fonseca et al"
22
23
24
import sys
25
import math
26
import os.path
27
import re
28
import textwrap
29
import optparse
30
import xml.parsers.expat
31
import collections
32
import locale
33
import json
34
35
36
# Python 2.x/3.x compatibility
37
if sys.version_info[0] >= 3:
38
PYTHON_3 = True
39
def compat_iteritems(x): return x.items() # No iteritems() in Python 3
40
def compat_itervalues(x): return x.values() # No itervalues() in Python 3
41
def compat_keys(x): return list(x.keys()) # keys() is a generator in Python 3
42
basestring = str # No class basestring in Python 3
43
unichr = chr # No unichr in Python 3
44
xrange = range # No xrange in Python 3
45
else:
46
PYTHON_3 = False
47
def compat_iteritems(x): return x.iteritems()
48
def compat_itervalues(x): return x.itervalues()
49
def compat_keys(x): return x.keys()
50
51
52
try:
53
# Debugging helper module
54
import debug
55
except ImportError:
56
pass
57
58
59
60
########################################################################
61
# Model
62
63
64
MULTIPLICATION_SIGN = unichr(0xd7)
65
66
67
def times(x):
68
return "%u%s" % (x, MULTIPLICATION_SIGN)
69
70
def percentage(p):
71
return "%.02f%%" % (p*100.0,)
72
73
def add(a, b):
74
return a + b
75
76
def fail(a, b):
77
assert False
78
79
80
tol = 2 ** -23
81
82
def ratio(numerator, denominator):
83
try:
84
ratio = float(numerator)/float(denominator)
85
except ZeroDivisionError:
86
# 0/0 is undefined, but 1.0 yields more useful results
87
return 1.0
88
if ratio < 0.0:
89
if ratio < -tol:
90
sys.stderr.write('warning: negative ratio (%s/%s)\n' % (numerator, denominator))
91
return 0.0
92
if ratio > 1.0:
93
if ratio > 1.0 + tol:
94
sys.stderr.write('warning: ratio greater than one (%s/%s)\n' % (numerator, denominator))
95
return 1.0
96
return ratio
97
98
99
class UndefinedEvent(Exception):
100
"""Raised when attempting to get an event which is undefined."""
101
102
def __init__(self, event):
103
Exception.__init__(self)
104
self.event = event
105
106
def __str__(self):
107
return 'unspecified event %s' % self.event.name
108
109
110
class Event(object):
111
"""Describe a kind of event, and its basic operations."""
112
113
def __init__(self, name, null, aggregator, formatter = str):
114
self.name = name
115
self._null = null
116
self._aggregator = aggregator
117
self._formatter = formatter
118
119
def __eq__(self, other):
120
return self is other
121
122
def __hash__(self):
123
return id(self)
124
125
def null(self):
126
return self._null
127
128
def aggregate(self, val1, val2):
129
"""Aggregate two event values."""
130
assert val1 is not None
131
assert val2 is not None
132
return self._aggregator(val1, val2)
133
134
def format(self, val):
135
"""Format an event value."""
136
assert val is not None
137
return self._formatter(val)
138
139
140
CALLS = Event("Calls", 0, add, times)
141
SAMPLES = Event("Samples", 0, add, times)
142
SAMPLES2 = Event("Samples", 0, add, times)
143
144
# Count of samples where a given function was either executing or on the stack.
145
# This is used to calculate the total time ratio according to the
146
# straightforward method described in Mike Dunlavey's answer to
147
# stackoverflow.com/questions/1777556/alternatives-to-gprof, item 4 (the myth
148
# "that recursion is a tricky confusing issue"), last edited 2012-08-30: it's
149
# just the ratio of TOTAL_SAMPLES over the number of samples in the profile.
150
#
151
# Used only when totalMethod == callstacks
152
TOTAL_SAMPLES = Event("Samples", 0, add, times)
153
154
TIME = Event("Time", 0.0, add, lambda x: '(' + str(x) + ')')
155
TIME_RATIO = Event("Time ratio", 0.0, add, lambda x: '(' + percentage(x) + ')')
156
TOTAL_TIME = Event("Total time", 0.0, fail)
157
TOTAL_TIME_RATIO = Event("Total time ratio", 0.0, fail, percentage)
158
159
totalMethod = 'callratios'
160
161
162
class Object(object):
163
"""Base class for all objects in profile which can store events."""
164
165
def __init__(self, events=None):
166
if events is None:
167
self.events = {}
168
else:
169
self.events = events
170
171
def __hash__(self):
172
return id(self)
173
174
def __eq__(self, other):
175
return self is other
176
177
def __lt__(self, other):
178
return id(self) < id(other)
179
180
def __contains__(self, event):
181
return event in self.events
182
183
def __getitem__(self, event):
184
try:
185
return self.events[event]
186
except KeyError:
187
raise UndefinedEvent(event)
188
189
def __setitem__(self, event, value):
190
if value is None:
191
if event in self.events:
192
del self.events[event]
193
else:
194
self.events[event] = value
195
196
197
class Call(Object):
198
"""A call between functions.
199
200
There should be at most one call object for every pair of functions.
201
"""
202
203
def __init__(self, callee_id):
204
Object.__init__(self)
205
self.callee_id = callee_id
206
self.ratio = None
207
self.weight = None
208
209
210
class Function(Object):
211
"""A function."""
212
213
def __init__(self, id, name):
214
Object.__init__(self)
215
self.id = id
216
self.name = name
217
self.module = None
218
self.process = None
219
self.calls = {}
220
self.called = None
221
self.weight = None
222
self.cycle = None
223
self.filename = None
224
225
def add_call(self, call):
226
if call.callee_id in self.calls:
227
sys.stderr.write('warning: overwriting call from function %s to %s\n' % (str(self.id), str(call.callee_id)))
228
self.calls[call.callee_id] = call
229
230
def get_call(self, callee_id):
231
if not callee_id in self.calls:
232
call = Call(callee_id)
233
call[SAMPLES] = 0
234
call[SAMPLES2] = 0
235
call[CALLS] = 0
236
self.calls[callee_id] = call
237
return self.calls[callee_id]
238
239
_parenthesis_re = re.compile(r'\([^()]*\)')
240
_angles_re = re.compile(r'<[^<>]*>')
241
_const_re = re.compile(r'\s+const$')
242
243
def stripped_name(self):
244
"""Remove extraneous information from C++ demangled function names."""
245
246
name = self.name
247
248
# Strip function parameters from name by recursively removing paired parenthesis
249
while True:
250
name, n = self._parenthesis_re.subn('', name)
251
if not n:
252
break
253
254
# Strip const qualifier
255
name = self._const_re.sub('', name)
256
257
# Strip template parameters from name by recursively removing paired angles
258
while True:
259
name, n = self._angles_re.subn('', name)
260
if not n:
261
break
262
263
return name
264
265
# TODO: write utility functions
266
267
def __repr__(self):
268
return self.name
269
270
271
class Cycle(Object):
272
"""A cycle made from recursive function calls."""
273
274
def __init__(self):
275
Object.__init__(self)
276
self.functions = set()
277
278
def add_function(self, function):
279
assert function not in self.functions
280
self.functions.add(function)
281
if function.cycle is not None:
282
for other in function.cycle.functions:
283
if function not in self.functions:
284
self.add_function(other)
285
function.cycle = self
286
287
288
class Profile(Object):
289
"""The whole profile."""
290
291
def __init__(self):
292
Object.__init__(self)
293
self.functions = {}
294
self.cycles = []
295
296
def add_function(self, function):
297
if function.id in self.functions:
298
sys.stderr.write('warning: overwriting function %s (id %s)\n' % (function.name, str(function.id)))
299
self.functions[function.id] = function
300
301
def add_cycle(self, cycle):
302
self.cycles.append(cycle)
303
304
def validate(self):
305
"""Validate the edges."""
306
307
for function in compat_itervalues(self.functions):
308
for callee_id in compat_keys(function.calls):
309
assert function.calls[callee_id].callee_id == callee_id
310
if callee_id not in self.functions:
311
sys.stderr.write('warning: call to undefined function %s from function %s\n' % (str(callee_id), function.name))
312
del function.calls[callee_id]
313
314
def find_cycles(self):
315
"""Find cycles using Tarjan's strongly connected components algorithm."""
316
317
# Apply the Tarjan's algorithm successively until all functions are visited
318
stack = []
319
data = {}
320
order = 0
321
for function in compat_itervalues(self.functions):
322
order = self._tarjan(function, order, stack, data)
323
cycles = []
324
for function in compat_itervalues(self.functions):
325
if function.cycle is not None and function.cycle not in cycles:
326
cycles.append(function.cycle)
327
self.cycles = cycles
328
if 0:
329
for cycle in cycles:
330
sys.stderr.write("Cycle:\n")
331
for member in cycle.functions:
332
sys.stderr.write("\tFunction %s\n" % member.name)
333
334
def prune_root(self, root):
335
visited = set()
336
frontier = set([root])
337
while len(frontier) > 0:
338
node = frontier.pop()
339
visited.add(node)
340
f = self.functions[node]
341
newNodes = f.calls.keys()
342
frontier = frontier.union(set(newNodes) - visited)
343
subtreeFunctions = {}
344
for n in visited:
345
subtreeFunctions[n] = self.functions[n]
346
self.functions = subtreeFunctions
347
348
def prune_leaf(self, leaf):
349
edgesUp = collections.defaultdict(set)
350
for f in self.functions.keys():
351
for n in self.functions[f].calls.keys():
352
edgesUp[n].add(f)
353
# build the tree up
354
visited = set()
355
frontier = set([leaf])
356
while len(frontier) > 0:
357
node = frontier.pop()
358
visited.add(node)
359
frontier = frontier.union(edgesUp[node] - visited)
360
downTree = set(self.functions.keys())
361
upTree = visited
362
path = downTree.intersection(upTree)
363
pathFunctions = {}
364
for n in path:
365
f = self.functions[n]
366
newCalls = {}
367
for c in f.calls.keys():
368
if c in path:
369
newCalls[c] = f.calls[c]
370
f.calls = newCalls
371
pathFunctions[n] = f
372
self.functions = pathFunctions
373
374
375
def getFunctionId(self, funcName):
376
for f in self.functions:
377
if self.functions[f].name == funcName:
378
return f
379
return False
380
381
class _TarjanData:
382
def __init__(self, order):
383
self.order = order
384
self.lowlink = order
385
self.onstack = False
386
387
def _tarjan(self, function, order, stack, data):
388
"""Tarjan's strongly connected components algorithm.
389
390
See also:
391
- http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
392
"""
393
394
try:
395
func_data = data[function.id]
396
return order
397
except KeyError:
398
func_data = self._TarjanData(order)
399
data[function.id] = func_data
400
order += 1
401
pos = len(stack)
402
stack.append(function)
403
func_data.onstack = True
404
for call in compat_itervalues(function.calls):
405
try:
406
callee_data = data[call.callee_id]
407
if callee_data.onstack:
408
func_data.lowlink = min(func_data.lowlink, callee_data.order)
409
except KeyError:
410
callee = self.functions[call.callee_id]
411
order = self._tarjan(callee, order, stack, data)
412
callee_data = data[call.callee_id]
413
func_data.lowlink = min(func_data.lowlink, callee_data.lowlink)
414
if func_data.lowlink == func_data.order:
415
# Strongly connected component found
416
members = stack[pos:]
417
del stack[pos:]
418
if len(members) > 1:
419
cycle = Cycle()
420
for member in members:
421
cycle.add_function(member)
422
data[member.id].onstack = False
423
else:
424
for member in members:
425
data[member.id].onstack = False
426
return order
427
428
def call_ratios(self, event):
429
# Aggregate for incoming calls
430
cycle_totals = {}
431
for cycle in self.cycles:
432
cycle_totals[cycle] = 0.0
433
function_totals = {}
434
for function in compat_itervalues(self.functions):
435
function_totals[function] = 0.0
436
437
# Pass 1: function_total gets the sum of call[event] for all
438
# incoming arrows. Same for cycle_total for all arrows
439
# that are coming into the *cycle* but are not part of it.
440
for function in compat_itervalues(self.functions):
441
for call in compat_itervalues(function.calls):
442
if call.callee_id != function.id:
443
callee = self.functions[call.callee_id]
444
if event in call.events:
445
function_totals[callee] += call[event]
446
if callee.cycle is not None and callee.cycle is not function.cycle:
447
cycle_totals[callee.cycle] += call[event]
448
else:
449
sys.stderr.write("call_ratios: No data for " + function.name + " call to " + callee.name + "\n")
450
451
# Pass 2: Compute the ratios. Each call[event] is scaled by the
452
# function_total of the callee. Calls into cycles use the
453
# cycle_total, but not calls within cycles.
454
for function in compat_itervalues(self.functions):
455
for call in compat_itervalues(function.calls):
456
assert call.ratio is None
457
if call.callee_id != function.id:
458
callee = self.functions[call.callee_id]
459
if event in call.events:
460
if callee.cycle is not None and callee.cycle is not function.cycle:
461
total = cycle_totals[callee.cycle]
462
else:
463
total = function_totals[callee]
464
call.ratio = ratio(call[event], total)
465
else:
466
# Warnings here would only repeat those issued above.
467
call.ratio = 0.0
468
469
def integrate(self, outevent, inevent):
470
"""Propagate function time ratio along the function calls.
471
472
Must be called after finding the cycles.
473
474
See also:
475
- http://citeseer.ist.psu.edu/graham82gprof.html
476
"""
477
478
# Sanity checking
479
assert outevent not in self
480
for function in compat_itervalues(self.functions):
481
assert outevent not in function
482
assert inevent in function
483
for call in compat_itervalues(function.calls):
484
assert outevent not in call
485
if call.callee_id != function.id:
486
assert call.ratio is not None
487
488
# Aggregate the input for each cycle
489
for cycle in self.cycles:
490
total = inevent.null()
491
for function in compat_itervalues(self.functions):
492
total = inevent.aggregate(total, function[inevent])
493
self[inevent] = total
494
495
# Integrate along the edges
496
total = inevent.null()
497
for function in compat_itervalues(self.functions):
498
total = inevent.aggregate(total, function[inevent])
499
self._integrate_function(function, outevent, inevent)
500
self[outevent] = total
501
502
def _integrate_function(self, function, outevent, inevent):
503
if function.cycle is not None:
504
return self._integrate_cycle(function.cycle, outevent, inevent)
505
else:
506
if outevent not in function:
507
total = function[inevent]
508
for call in compat_itervalues(function.calls):
509
if call.callee_id != function.id:
510
total += self._integrate_call(call, outevent, inevent)
511
function[outevent] = total
512
return function[outevent]
513
514
def _integrate_call(self, call, outevent, inevent):
515
assert outevent not in call
516
assert call.ratio is not None
517
callee = self.functions[call.callee_id]
518
subtotal = call.ratio *self._integrate_function(callee, outevent, inevent)
519
call[outevent] = subtotal
520
return subtotal
521
522
def _integrate_cycle(self, cycle, outevent, inevent):
523
if outevent not in cycle:
524
525
# Compute the outevent for the whole cycle
526
total = inevent.null()
527
for member in cycle.functions:
528
subtotal = member[inevent]
529
for call in compat_itervalues(member.calls):
530
callee = self.functions[call.callee_id]
531
if callee.cycle is not cycle:
532
subtotal += self._integrate_call(call, outevent, inevent)
533
total += subtotal
534
cycle[outevent] = total
535
536
# Compute the time propagated to callers of this cycle
537
callees = {}
538
for function in compat_itervalues(self.functions):
539
if function.cycle is not cycle:
540
for call in compat_itervalues(function.calls):
541
callee = self.functions[call.callee_id]
542
if callee.cycle is cycle:
543
try:
544
callees[callee] += call.ratio
545
except KeyError:
546
callees[callee] = call.ratio
547
548
for member in cycle.functions:
549
member[outevent] = outevent.null()
550
551
for callee, call_ratio in compat_iteritems(callees):
552
ranks = {}
553
call_ratios = {}
554
partials = {}
555
self._rank_cycle_function(cycle, callee, ranks)
556
self._call_ratios_cycle(cycle, callee, ranks, call_ratios, set())
557
partial = self._integrate_cycle_function(cycle, callee, call_ratio, partials, ranks, call_ratios, outevent, inevent)
558
559
# Ensure `partial == max(partials.values())`, but with round-off tolerance
560
max_partial = max(partials.values())
561
assert abs(partial - max_partial) <= 1e-7*max_partial
562
563
assert abs(call_ratio*total - partial) <= 0.001*call_ratio*total
564
565
return cycle[outevent]
566
567
def _rank_cycle_function(self, cycle, function, ranks):
568
"""Dijkstra's shortest paths algorithm.
569
570
See also:
571
- http://en.wikipedia.org/wiki/Dijkstra's_algorithm
572
"""
573
574
import heapq
575
Q = []
576
Qd = {}
577
p = {}
578
visited = set([function])
579
580
ranks[function] = 0
581
for call in compat_itervalues(function.calls):
582
if call.callee_id != function.id:
583
callee = self.functions[call.callee_id]
584
if callee.cycle is cycle:
585
ranks[callee] = 1
586
item = [ranks[callee], function, callee]
587
heapq.heappush(Q, item)
588
Qd[callee] = item
589
590
while Q:
591
cost, parent, member = heapq.heappop(Q)
592
if member not in visited:
593
p[member]= parent
594
visited.add(member)
595
for call in compat_itervalues(member.calls):
596
if call.callee_id != member.id:
597
callee = self.functions[call.callee_id]
598
if callee.cycle is cycle:
599
member_rank = ranks[member]
600
rank = ranks.get(callee)
601
if rank is not None:
602
if rank > 1 + member_rank:
603
rank = 1 + member_rank
604
ranks[callee] = rank
605
Qd_callee = Qd[callee]
606
Qd_callee[0] = rank
607
Qd_callee[1] = member
608
heapq._siftdown(Q, 0, Q.index(Qd_callee))
609
else:
610
rank = 1 + member_rank
611
ranks[callee] = rank
612
item = [rank, member, callee]
613
heapq.heappush(Q, item)
614
Qd[callee] = item
615
616
def _call_ratios_cycle(self, cycle, function, ranks, call_ratios, visited):
617
if function not in visited:
618
visited.add(function)
619
for call in compat_itervalues(function.calls):
620
if call.callee_id != function.id:
621
callee = self.functions[call.callee_id]
622
if callee.cycle is cycle:
623
if ranks[callee] > ranks[function]:
624
call_ratios[callee] = call_ratios.get(callee, 0.0) + call.ratio
625
self._call_ratios_cycle(cycle, callee, ranks, call_ratios, visited)
626
627
def _integrate_cycle_function(self, cycle, function, partial_ratio, partials, ranks, call_ratios, outevent, inevent):
628
if function not in partials:
629
partial = partial_ratio*function[inevent]
630
for call in compat_itervalues(function.calls):
631
if call.callee_id != function.id:
632
callee = self.functions[call.callee_id]
633
if callee.cycle is not cycle:
634
assert outevent in call
635
partial += partial_ratio*call[outevent]
636
else:
637
if ranks[callee] > ranks[function]:
638
callee_partial = self._integrate_cycle_function(cycle, callee, partial_ratio, partials, ranks, call_ratios, outevent, inevent)
639
call_ratio = ratio(call.ratio, call_ratios[callee])
640
call_partial = call_ratio*callee_partial
641
try:
642
call[outevent] += call_partial
643
except UndefinedEvent:
644
call[outevent] = call_partial
645
partial += call_partial
646
partials[function] = partial
647
try:
648
function[outevent] += partial
649
except UndefinedEvent:
650
function[outevent] = partial
651
return partials[function]
652
653
def aggregate(self, event):
654
"""Aggregate an event for the whole profile."""
655
656
total = event.null()
657
for function in compat_itervalues(self.functions):
658
try:
659
total = event.aggregate(total, function[event])
660
except UndefinedEvent:
661
return
662
self[event] = total
663
664
def ratio(self, outevent, inevent):
665
assert outevent not in self
666
assert inevent in self
667
for function in compat_itervalues(self.functions):
668
assert outevent not in function
669
assert inevent in function
670
function[outevent] = ratio(function[inevent], self[inevent])
671
for call in compat_itervalues(function.calls):
672
assert outevent not in call
673
if inevent in call:
674
call[outevent] = ratio(call[inevent], self[inevent])
675
self[outevent] = 1.0
676
677
def prune(self, node_thres, edge_thres, colour_nodes_by_selftime):
678
"""Prune the profile"""
679
680
# compute the prune ratios
681
for function in compat_itervalues(self.functions):
682
try:
683
function.weight = function[TOTAL_TIME_RATIO]
684
except UndefinedEvent:
685
pass
686
687
for call in compat_itervalues(function.calls):
688
callee = self.functions[call.callee_id]
689
690
if TOTAL_TIME_RATIO in call:
691
# handle exact cases first
692
call.weight = call[TOTAL_TIME_RATIO]
693
else:
694
try:
695
# make a safe estimate
696
call.weight = min(function[TOTAL_TIME_RATIO], callee[TOTAL_TIME_RATIO])
697
except UndefinedEvent:
698
pass
699
700
# prune the nodes
701
for function_id in compat_keys(self.functions):
702
function = self.functions[function_id]
703
if function.weight is not None:
704
if function.weight < node_thres:
705
del self.functions[function_id]
706
707
# prune the egdes
708
for function in compat_itervalues(self.functions):
709
for callee_id in compat_keys(function.calls):
710
call = function.calls[callee_id]
711
if callee_id not in self.functions or call.weight is not None and call.weight < edge_thres:
712
del function.calls[callee_id]
713
714
if colour_nodes_by_selftime:
715
weights = []
716
for function in compat_itervalues(self.functions):
717
try:
718
weights.append(function[TIME_RATIO])
719
except UndefinedEvent:
720
pass
721
max_ratio = max(weights or [1])
722
723
# apply rescaled weights for coloriung
724
for function in compat_itervalues(self.functions):
725
try:
726
function.weight = function[TIME_RATIO] / max_ratio
727
except (ZeroDivisionError, UndefinedEvent):
728
pass
729
730
def dump(self):
731
for function in compat_itervalues(self.functions):
732
sys.stderr.write('Function %s:\n' % (function.name,))
733
self._dump_events(function.events)
734
for call in compat_itervalues(function.calls):
735
callee = self.functions[call.callee_id]
736
sys.stderr.write(' Call %s:\n' % (callee.name,))
737
self._dump_events(call.events)
738
for cycle in self.cycles:
739
sys.stderr.write('Cycle:\n')
740
self._dump_events(cycle.events)
741
for function in cycle.functions:
742
sys.stderr.write(' Function %s\n' % (function.name,))
743
744
def _dump_events(self, events):
745
for event, value in compat_iteritems(events):
746
sys.stderr.write(' %s: %s\n' % (event.name, event.format(value)))
747
748
749
750
########################################################################
751
# Parsers
752
753
754
class Struct:
755
"""Masquerade a dictionary with a structure-like behavior."""
756
757
def __init__(self, attrs = None):
758
if attrs is None:
759
attrs = {}
760
self.__dict__['_attrs'] = attrs
761
762
def __getattr__(self, name):
763
try:
764
return self._attrs[name]
765
except KeyError:
766
raise AttributeError(name)
767
768
def __setattr__(self, name, value):
769
self._attrs[name] = value
770
771
def __str__(self):
772
return str(self._attrs)
773
774
def __repr__(self):
775
return repr(self._attrs)
776
777
778
class ParseError(Exception):
779
"""Raised when parsing to signal mismatches."""
780
781
def __init__(self, msg, line):
782
Exception.__init__(self)
783
self.msg = msg
784
# TODO: store more source line information
785
self.line = line
786
787
def __str__(self):
788
return '%s: %r' % (self.msg, self.line)
789
790
791
class Parser:
792
"""Parser interface."""
793
794
stdinInput = True
795
multipleInput = False
796
797
def __init__(self):
798
pass
799
800
def parse(self):
801
raise NotImplementedError
802
803
804
class JsonParser(Parser):
805
"""Parser for a custom JSON representation of profile data.
806
807
See schema.json for details.
808
"""
809
810
811
def __init__(self, stream):
812
Parser.__init__(self)
813
self.stream = stream
814
815
def parse(self):
816
817
obj = json.load(self.stream)
818
819
assert obj['version'] == 0
820
821
profile = Profile()
822
profile[SAMPLES] = 0
823
824
fns = obj['functions']
825
826
for functionIndex in range(len(fns)):
827
fn = fns[functionIndex]
828
function = Function(functionIndex, fn['name'])
829
try:
830
function.module = fn['module']
831
except KeyError:
832
pass
833
try:
834
function.process = fn['process']
835
except KeyError:
836
pass
837
function[SAMPLES] = 0
838
profile.add_function(function)
839
840
for event in obj['events']:
841
callchain = []
842
843
for functionIndex in event['callchain']:
844
function = profile.functions[functionIndex]
845
callchain.append(function)
846
847
cost = event['cost'][0]
848
849
callee = callchain[0]
850
callee[SAMPLES] += cost
851
profile[SAMPLES] += cost
852
853
for caller in callchain[1:]:
854
try:
855
call = caller.calls[callee.id]
856
except KeyError:
857
call = Call(callee.id)
858
call[SAMPLES2] = cost
859
caller.add_call(call)
860
else:
861
call[SAMPLES2] += cost
862
863
callee = caller
864
865
if False:
866
profile.dump()
867
868
# compute derived data
869
profile.validate()
870
profile.find_cycles()
871
profile.ratio(TIME_RATIO, SAMPLES)
872
profile.call_ratios(SAMPLES2)
873
profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
874
875
return profile
876
877
878
class LineParser(Parser):
879
"""Base class for parsers that read line-based formats."""
880
881
def __init__(self, stream):
882
Parser.__init__(self)
883
self._stream = stream
884
self.__line = None
885
self.__eof = False
886
self.line_no = 0
887
888
def readline(self):
889
line = self._stream.readline()
890
if not line:
891
self.__line = ''
892
self.__eof = True
893
else:
894
self.line_no += 1
895
line = line.rstrip('\r\n')
896
if not PYTHON_3:
897
encoding = self._stream.encoding
898
if encoding is None:
899
encoding = locale.getpreferredencoding()
900
line = line.decode(encoding)
901
self.__line = line
902
903
def lookahead(self):
904
assert self.__line is not None
905
return self.__line
906
907
def consume(self):
908
assert self.__line is not None
909
line = self.__line
910
self.readline()
911
return line
912
913
def eof(self):
914
assert self.__line is not None
915
return self.__eof
916
917
918
XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF = range(4)
919
920
921
class XmlToken:
922
923
def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
924
assert type in (XML_ELEMENT_START, XML_ELEMENT_END, XML_CHARACTER_DATA, XML_EOF)
925
self.type = type
926
self.name_or_data = name_or_data
927
self.attrs = attrs
928
self.line = line
929
self.column = column
930
931
def __str__(self):
932
if self.type == XML_ELEMENT_START:
933
return '<' + self.name_or_data + ' ...>'
934
if self.type == XML_ELEMENT_END:
935
return '</' + self.name_or_data + '>'
936
if self.type == XML_CHARACTER_DATA:
937
return self.name_or_data
938
if self.type == XML_EOF:
939
return 'end of file'
940
assert 0
941
942
943
class XmlTokenizer:
944
"""Expat based XML tokenizer."""
945
946
def __init__(self, fp, skip_ws = True):
947
self.fp = fp
948
self.tokens = []
949
self.index = 0
950
self.final = False
951
self.skip_ws = skip_ws
952
953
self.character_pos = 0, 0
954
self.character_data = ''
955
956
self.parser = xml.parsers.expat.ParserCreate()
957
self.parser.StartElementHandler = self.handle_element_start
958
self.parser.EndElementHandler = self.handle_element_end
959
self.parser.CharacterDataHandler = self.handle_character_data
960
961
def handle_element_start(self, name, attributes):
962
self.finish_character_data()
963
line, column = self.pos()
964
token = XmlToken(XML_ELEMENT_START, name, attributes, line, column)
965
self.tokens.append(token)
966
967
def handle_element_end(self, name):
968
self.finish_character_data()
969
line, column = self.pos()
970
token = XmlToken(XML_ELEMENT_END, name, None, line, column)
971
self.tokens.append(token)
972
973
def handle_character_data(self, data):
974
if not self.character_data:
975
self.character_pos = self.pos()
976
self.character_data += data
977
978
def finish_character_data(self):
979
if self.character_data:
980
if not self.skip_ws or not self.character_data.isspace():
981
line, column = self.character_pos
982
token = XmlToken(XML_CHARACTER_DATA, self.character_data, None, line, column)
983
self.tokens.append(token)
984
self.character_data = ''
985
986
def next(self):
987
size = 16*1024
988
while self.index >= len(self.tokens) and not self.final:
989
self.tokens = []
990
self.index = 0
991
data = self.fp.read(size)
992
self.final = len(data) < size
993
self.parser.Parse(data, self.final)
994
if self.index >= len(self.tokens):
995
line, column = self.pos()
996
token = XmlToken(XML_EOF, None, None, line, column)
997
else:
998
token = self.tokens[self.index]
999
self.index += 1
1000
return token
1001
1002
def pos(self):
1003
return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
1004
1005
1006
class XmlTokenMismatch(Exception):
1007
1008
def __init__(self, expected, found):
1009
Exception.__init__(self)
1010
self.expected = expected
1011
self.found = found
1012
1013
def __str__(self):
1014
return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
1015
1016
1017
class XmlParser(Parser):
1018
"""Base XML document parser."""
1019
1020
def __init__(self, fp):
1021
Parser.__init__(self)
1022
self.tokenizer = XmlTokenizer(fp)
1023
self.consume()
1024
1025
def consume(self):
1026
self.token = self.tokenizer.next()
1027
1028
def match_element_start(self, name):
1029
return self.token.type == XML_ELEMENT_START and self.token.name_or_data == name
1030
1031
def match_element_end(self, name):
1032
return self.token.type == XML_ELEMENT_END and self.token.name_or_data == name
1033
1034
def element_start(self, name):
1035
while self.token.type == XML_CHARACTER_DATA:
1036
self.consume()
1037
if self.token.type != XML_ELEMENT_START:
1038
raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
1039
if self.token.name_or_data != name:
1040
raise XmlTokenMismatch(XmlToken(XML_ELEMENT_START, name), self.token)
1041
attrs = self.token.attrs
1042
self.consume()
1043
return attrs
1044
1045
def element_end(self, name):
1046
while self.token.type == XML_CHARACTER_DATA:
1047
self.consume()
1048
if self.token.type != XML_ELEMENT_END:
1049
raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
1050
if self.token.name_or_data != name:
1051
raise XmlTokenMismatch(XmlToken(XML_ELEMENT_END, name), self.token)
1052
self.consume()
1053
1054
def character_data(self, strip = True):
1055
data = ''
1056
while self.token.type == XML_CHARACTER_DATA:
1057
data += self.token.name_or_data
1058
self.consume()
1059
if strip:
1060
data = data.strip()
1061
return data
1062
1063
1064
class GprofParser(Parser):
1065
"""Parser for GNU gprof output.
1066
1067
See also:
1068
- Chapter "Interpreting gprof's Output" from the GNU gprof manual
1069
http://sourceware.org/binutils/docs-2.18/gprof/Call-Graph.html#Call-Graph
1070
- File "cg_print.c" from the GNU gprof source code
1071
http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/src/gprof/cg_print.c?rev=1.12&cvsroot=src
1072
"""
1073
1074
def __init__(self, fp):
1075
Parser.__init__(self)
1076
self.fp = fp
1077
self.functions = {}
1078
self.cycles = {}
1079
1080
def readline(self):
1081
line = self.fp.readline()
1082
if not line:
1083
sys.stderr.write('error: unexpected end of file\n')
1084
sys.exit(1)
1085
line = line.rstrip('\r\n')
1086
return line
1087
1088
_int_re = re.compile(r'^\d+$')
1089
_float_re = re.compile(r'^\d+\.\d+$')
1090
1091
def translate(self, mo):
1092
"""Extract a structure from a match object, while translating the types in the process."""
1093
attrs = {}
1094
groupdict = mo.groupdict()
1095
for name, value in compat_iteritems(groupdict):
1096
if value is None:
1097
value = None
1098
elif self._int_re.match(value):
1099
value = int(value)
1100
elif self._float_re.match(value):
1101
value = float(value)
1102
attrs[name] = (value)
1103
return Struct(attrs)
1104
1105
_cg_header_re = re.compile(
1106
# original gprof header
1107
r'^\s+called/total\s+parents\s*$|' +
1108
r'^index\s+%time\s+self\s+descendents\s+called\+self\s+name\s+index\s*$|' +
1109
r'^\s+called/total\s+children\s*$|' +
1110
# GNU gprof header
1111
r'^index\s+%\s+time\s+self\s+children\s+called\s+name\s*$'
1112
)
1113
1114
_cg_ignore_re = re.compile(
1115
# spontaneous
1116
r'^\s+<spontaneous>\s*$|'
1117
# internal calls (such as "mcount")
1118
r'^.*\((\d+)\)$'
1119
)
1120
1121
_cg_primary_re = re.compile(
1122
r'^\[(?P<index>\d+)\]?' +
1123
r'\s+(?P<percentage_time>\d+\.\d+)' +
1124
r'\s+(?P<self>\d+\.\d+)' +
1125
r'\s+(?P<descendants>\d+\.\d+)' +
1126
r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
1127
r'\s+(?P<name>\S.*?)' +
1128
r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
1129
r'\s\[(\d+)\]$'
1130
)
1131
1132
_cg_parent_re = re.compile(
1133
r'^\s+(?P<self>\d+\.\d+)?' +
1134
r'\s+(?P<descendants>\d+\.\d+)?' +
1135
r'\s+(?P<called>\d+)(?:/(?P<called_total>\d+))?' +
1136
r'\s+(?P<name>\S.*?)' +
1137
r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
1138
r'\s\[(?P<index>\d+)\]$'
1139
)
1140
1141
_cg_child_re = _cg_parent_re
1142
1143
_cg_cycle_header_re = re.compile(
1144
r'^\[(?P<index>\d+)\]?' +
1145
r'\s+(?P<percentage_time>\d+\.\d+)' +
1146
r'\s+(?P<self>\d+\.\d+)' +
1147
r'\s+(?P<descendants>\d+\.\d+)' +
1148
r'\s+(?:(?P<called>\d+)(?:\+(?P<called_self>\d+))?)?' +
1149
r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
1150
r'\s\[(\d+)\]$'
1151
)
1152
1153
_cg_cycle_member_re = re.compile(
1154
r'^\s+(?P<self>\d+\.\d+)?' +
1155
r'\s+(?P<descendants>\d+\.\d+)?' +
1156
r'\s+(?P<called>\d+)(?:\+(?P<called_self>\d+))?' +
1157
r'\s+(?P<name>\S.*?)' +
1158
r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
1159
r'\s\[(?P<index>\d+)\]$'
1160
)
1161
1162
_cg_sep_re = re.compile(r'^--+$')
1163
1164
def parse_function_entry(self, lines):
1165
parents = []
1166
children = []
1167
1168
while True:
1169
if not lines:
1170
sys.stderr.write('warning: unexpected end of entry\n')
1171
line = lines.pop(0)
1172
if line.startswith('['):
1173
break
1174
1175
# read function parent line
1176
mo = self._cg_parent_re.match(line)
1177
if not mo:
1178
if self._cg_ignore_re.match(line):
1179
continue
1180
sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
1181
else:
1182
parent = self.translate(mo)
1183
parents.append(parent)
1184
1185
# read primary line
1186
mo = self._cg_primary_re.match(line)
1187
if not mo:
1188
sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
1189
return
1190
else:
1191
function = self.translate(mo)
1192
1193
while lines:
1194
line = lines.pop(0)
1195
1196
# read function subroutine line
1197
mo = self._cg_child_re.match(line)
1198
if not mo:
1199
if self._cg_ignore_re.match(line):
1200
continue
1201
sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
1202
else:
1203
child = self.translate(mo)
1204
children.append(child)
1205
1206
function.parents = parents
1207
function.children = children
1208
1209
self.functions[function.index] = function
1210
1211
def parse_cycle_entry(self, lines):
1212
1213
# read cycle header line
1214
line = lines[0]
1215
mo = self._cg_cycle_header_re.match(line)
1216
if not mo:
1217
sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
1218
return
1219
cycle = self.translate(mo)
1220
1221
# read cycle member lines
1222
cycle.functions = []
1223
for line in lines[1:]:
1224
mo = self._cg_cycle_member_re.match(line)
1225
if not mo:
1226
sys.stderr.write('warning: unrecognized call graph entry: %r\n' % line)
1227
continue
1228
call = self.translate(mo)
1229
cycle.functions.append(call)
1230
1231
self.cycles[cycle.cycle] = cycle
1232
1233
def parse_cg_entry(self, lines):
1234
if lines[0].startswith("["):
1235
self.parse_cycle_entry(lines)
1236
else:
1237
self.parse_function_entry(lines)
1238
1239
def parse_cg(self):
1240
"""Parse the call graph."""
1241
1242
# skip call graph header
1243
while not self._cg_header_re.match(self.readline()):
1244
pass
1245
line = self.readline()
1246
while self._cg_header_re.match(line):
1247
line = self.readline()
1248
1249
# process call graph entries
1250
entry_lines = []
1251
while line != '\014': # form feed
1252
if line and not line.isspace():
1253
if self._cg_sep_re.match(line):
1254
self.parse_cg_entry(entry_lines)
1255
entry_lines = []
1256
else:
1257
entry_lines.append(line)
1258
line = self.readline()
1259
1260
def parse(self):
1261
self.parse_cg()
1262
self.fp.close()
1263
1264
profile = Profile()
1265
profile[TIME] = 0.0
1266
1267
cycles = {}
1268
for index in self.cycles:
1269
cycles[index] = Cycle()
1270
1271
for entry in compat_itervalues(self.functions):
1272
# populate the function
1273
function = Function(entry.index, entry.name)
1274
function[TIME] = entry.self
1275
if entry.called is not None:
1276
function.called = entry.called
1277
if entry.called_self is not None:
1278
call = Call(entry.index)
1279
call[CALLS] = entry.called_self
1280
function.called += entry.called_self
1281
1282
# populate the function calls
1283
for child in entry.children:
1284
call = Call(child.index)
1285
1286
assert child.called is not None
1287
call[CALLS] = child.called
1288
1289
if child.index not in self.functions:
1290
# NOTE: functions that were never called but were discovered by gprof's
1291
# static call graph analysis dont have a call graph entry so we need
1292
# to add them here
1293
missing = Function(child.index, child.name)
1294
function[TIME] = 0.0
1295
function.called = 0
1296
profile.add_function(missing)
1297
1298
function.add_call(call)
1299
1300
profile.add_function(function)
1301
1302
if entry.cycle is not None:
1303
try:
1304
cycle = cycles[entry.cycle]
1305
except KeyError:
1306
sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle)
1307
cycle = Cycle()
1308
cycles[entry.cycle] = cycle
1309
cycle.add_function(function)
1310
1311
profile[TIME] = profile[TIME] + function[TIME]
1312
1313
for cycle in compat_itervalues(cycles):
1314
profile.add_cycle(cycle)
1315
1316
# Compute derived events
1317
profile.validate()
1318
profile.ratio(TIME_RATIO, TIME)
1319
profile.call_ratios(CALLS)
1320
profile.integrate(TOTAL_TIME, TIME)
1321
profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
1322
1323
return profile
1324
1325
1326
# Clone&hack of GprofParser for VTune Amplifier XE 2013 gprof-cc output.
1327
# Tested only with AXE 2013 for Windows.
1328
# - Use total times as reported by AXE.
1329
# - In the absence of call counts, call ratios are faked from the relative
1330
# proportions of total time. This affects only the weighting of the calls.
1331
# - Different header, separator, and end marker.
1332
# - Extra whitespace after function names.
1333
# - You get a full entry for <spontaneous>, which does not have parents.
1334
# - Cycles do have parents. These are saved but unused (as they are
1335
# for functions).
1336
# - Disambiguated "unrecognized call graph entry" error messages.
1337
# Notes:
1338
# - Total time of functions as reported by AXE passes the val3 test.
1339
# - CPU Time:Children in the input is sometimes a negative number. This
1340
# value goes to the variable descendants, which is unused.
1341
# - The format of gprof-cc reports is unaffected by the use of
1342
# -knob enable-call-counts=true (no call counts, ever), or
1343
# -show-as=samples (results are quoted in seconds regardless).
1344
class AXEParser(Parser):
1345
"Parser for VTune Amplifier XE 2013 gprof-cc report output."
1346
1347
def __init__(self, fp):
1348
Parser.__init__(self)
1349
self.fp = fp
1350
self.functions = {}
1351
self.cycles = {}
1352
1353
def readline(self):
1354
line = self.fp.readline()
1355
if not line:
1356
sys.stderr.write('error: unexpected end of file\n')
1357
sys.exit(1)
1358
line = line.rstrip('\r\n')
1359
return line
1360
1361
_int_re = re.compile(r'^\d+$')
1362
_float_re = re.compile(r'^\d+\.\d+$')
1363
1364
def translate(self, mo):
1365
"""Extract a structure from a match object, while translating the types in the process."""
1366
attrs = {}
1367
groupdict = mo.groupdict()
1368
for name, value in compat_iteritems(groupdict):
1369
if value is None:
1370
value = None
1371
elif self._int_re.match(value):
1372
value = int(value)
1373
elif self._float_re.match(value):
1374
value = float(value)
1375
attrs[name] = (value)
1376
return Struct(attrs)
1377
1378
_cg_header_re = re.compile(
1379
'^Index |'
1380
'^-----+ '
1381
)
1382
1383
_cg_footer_re = re.compile(r'^Index\s+Function\s*$')
1384
1385
_cg_primary_re = re.compile(
1386
r'^\[(?P<index>\d+)\]?' +
1387
r'\s+(?P<percentage_time>\d+\.\d+)' +
1388
r'\s+(?P<self>\d+\.\d+)' +
1389
r'\s+(?P<descendants>\d+\.\d+)' +
1390
r'\s+(?P<name>\S.*?)' +
1391
r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
1392
r'\s+\[(\d+)\]' +
1393
r'\s*$'
1394
)
1395
1396
_cg_parent_re = re.compile(
1397
r'^\s+(?P<self>\d+\.\d+)?' +
1398
r'\s+(?P<descendants>\d+\.\d+)?' +
1399
r'\s+(?P<name>\S.*?)' +
1400
r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
1401
r'(?:\s+\[(?P<index>\d+)\]\s*)?' +
1402
r'\s*$'
1403
)
1404
1405
_cg_child_re = _cg_parent_re
1406
1407
_cg_cycle_header_re = re.compile(
1408
r'^\[(?P<index>\d+)\]?' +
1409
r'\s+(?P<percentage_time>\d+\.\d+)' +
1410
r'\s+(?P<self>\d+\.\d+)' +
1411
r'\s+(?P<descendants>\d+\.\d+)' +
1412
r'\s+<cycle\s(?P<cycle>\d+)\sas\sa\swhole>' +
1413
r'\s+\[(\d+)\]' +
1414
r'\s*$'
1415
)
1416
1417
_cg_cycle_member_re = re.compile(
1418
r'^\s+(?P<self>\d+\.\d+)?' +
1419
r'\s+(?P<descendants>\d+\.\d+)?' +
1420
r'\s+(?P<name>\S.*?)' +
1421
r'(?:\s+<cycle\s(?P<cycle>\d+)>)?' +
1422
r'\s+\[(?P<index>\d+)\]' +
1423
r'\s*$'
1424
)
1425
1426
def parse_function_entry(self, lines):
1427
parents = []
1428
children = []
1429
1430
while True:
1431
if not lines:
1432
sys.stderr.write('warning: unexpected end of entry\n')
1433
return
1434
line = lines.pop(0)
1435
if line.startswith('['):
1436
break
1437
1438
# read function parent line
1439
mo = self._cg_parent_re.match(line)
1440
if not mo:
1441
sys.stderr.write('warning: unrecognized call graph entry (1): %r\n' % line)
1442
else:
1443
parent = self.translate(mo)
1444
if parent.name != '<spontaneous>':
1445
parents.append(parent)
1446
1447
# read primary line
1448
mo = self._cg_primary_re.match(line)
1449
if not mo:
1450
sys.stderr.write('warning: unrecognized call graph entry (2): %r\n' % line)
1451
return
1452
else:
1453
function = self.translate(mo)
1454
1455
while lines:
1456
line = lines.pop(0)
1457
1458
# read function subroutine line
1459
mo = self._cg_child_re.match(line)
1460
if not mo:
1461
sys.stderr.write('warning: unrecognized call graph entry (3): %r\n' % line)
1462
else:
1463
child = self.translate(mo)
1464
if child.name != '<spontaneous>':
1465
children.append(child)
1466
1467
if function.name != '<spontaneous>':
1468
function.parents = parents
1469
function.children = children
1470
1471
self.functions[function.index] = function
1472
1473
def parse_cycle_entry(self, lines):
1474
1475
# Process the parents that were not there in gprof format.
1476
parents = []
1477
while True:
1478
if not lines:
1479
sys.stderr.write('warning: unexpected end of cycle entry\n')
1480
return
1481
line = lines.pop(0)
1482
if line.startswith('['):
1483
break
1484
mo = self._cg_parent_re.match(line)
1485
if not mo:
1486
sys.stderr.write('warning: unrecognized call graph entry (6): %r\n' % line)
1487
else:
1488
parent = self.translate(mo)
1489
if parent.name != '<spontaneous>':
1490
parents.append(parent)
1491
1492
# read cycle header line
1493
mo = self._cg_cycle_header_re.match(line)
1494
if not mo:
1495
sys.stderr.write('warning: unrecognized call graph entry (4): %r\n' % line)
1496
return
1497
cycle = self.translate(mo)
1498
1499
# read cycle member lines
1500
cycle.functions = []
1501
for line in lines[1:]:
1502
mo = self._cg_cycle_member_re.match(line)
1503
if not mo:
1504
sys.stderr.write('warning: unrecognized call graph entry (5): %r\n' % line)
1505
continue
1506
call = self.translate(mo)
1507
cycle.functions.append(call)
1508
1509
cycle.parents = parents
1510
self.cycles[cycle.cycle] = cycle
1511
1512
def parse_cg_entry(self, lines):
1513
if any("as a whole" in linelooper for linelooper in lines):
1514
self.parse_cycle_entry(lines)
1515
else:
1516
self.parse_function_entry(lines)
1517
1518
def parse_cg(self):
1519
"""Parse the call graph."""
1520
1521
# skip call graph header
1522
line = self.readline()
1523
while self._cg_header_re.match(line):
1524
line = self.readline()
1525
1526
# process call graph entries
1527
entry_lines = []
1528
# An EOF in readline terminates the program without returning.
1529
while not self._cg_footer_re.match(line):
1530
if line.isspace():
1531
self.parse_cg_entry(entry_lines)
1532
entry_lines = []
1533
else:
1534
entry_lines.append(line)
1535
line = self.readline()
1536
1537
def parse(self):
1538
sys.stderr.write('warning: for axe format, edge weights are unreliable estimates derived from function total times.\n')
1539
self.parse_cg()
1540
self.fp.close()
1541
1542
profile = Profile()
1543
profile[TIME] = 0.0
1544
1545
cycles = {}
1546
for index in self.cycles:
1547
cycles[index] = Cycle()
1548
1549
for entry in compat_itervalues(self.functions):
1550
# populate the function
1551
function = Function(entry.index, entry.name)
1552
function[TIME] = entry.self
1553
function[TOTAL_TIME_RATIO] = entry.percentage_time / 100.0
1554
1555
# populate the function calls
1556
for child in entry.children:
1557
call = Call(child.index)
1558
# The following bogus value affects only the weighting of
1559
# the calls.
1560
call[TOTAL_TIME_RATIO] = function[TOTAL_TIME_RATIO]
1561
1562
if child.index not in self.functions:
1563
# NOTE: functions that were never called but were discovered by gprof's
1564
# static call graph analysis dont have a call graph entry so we need
1565
# to add them here
1566
# FIXME: Is this applicable?
1567
missing = Function(child.index, child.name)
1568
function[TIME] = 0.0
1569
profile.add_function(missing)
1570
1571
function.add_call(call)
1572
1573
profile.add_function(function)
1574
1575
if entry.cycle is not None:
1576
try:
1577
cycle = cycles[entry.cycle]
1578
except KeyError:
1579
sys.stderr.write('warning: <cycle %u as a whole> entry missing\n' % entry.cycle)
1580
cycle = Cycle()
1581
cycles[entry.cycle] = cycle
1582
cycle.add_function(function)
1583
1584
profile[TIME] = profile[TIME] + function[TIME]
1585
1586
for cycle in compat_itervalues(cycles):
1587
profile.add_cycle(cycle)
1588
1589
# Compute derived events.
1590
profile.validate()
1591
profile.ratio(TIME_RATIO, TIME)
1592
# Lacking call counts, fake call ratios based on total times.
1593
profile.call_ratios(TOTAL_TIME_RATIO)
1594
# The TOTAL_TIME_RATIO of functions is already set. Propagate that
1595
# total time to the calls. (TOTAL_TIME is neither set nor used.)
1596
for function in compat_itervalues(profile.functions):
1597
for call in compat_itervalues(function.calls):
1598
if call.ratio is not None:
1599
callee = profile.functions[call.callee_id]
1600
call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO]
1601
1602
return profile
1603
1604
1605
class CallgrindParser(LineParser):
1606
"""Parser for valgrind's callgrind tool.
1607
1608
See also:
1609
- http://valgrind.org/docs/manual/cl-format.html
1610
"""
1611
1612
_call_re = re.compile(r'^calls=\s*(\d+)\s+((\d+|\+\d+|-\d+|\*)\s+)+$')
1613
1614
def __init__(self, infile):
1615
LineParser.__init__(self, infile)
1616
1617
# Textual positions
1618
self.position_ids = {}
1619
self.positions = {}
1620
1621
# Numeric positions
1622
self.num_positions = 1
1623
self.cost_positions = ['line']
1624
self.last_positions = [0]
1625
1626
# Events
1627
self.num_events = 0
1628
self.cost_events = []
1629
1630
self.profile = Profile()
1631
self.profile[SAMPLES] = 0
1632
1633
def parse(self):
1634
# read lookahead
1635
self.readline()
1636
1637
self.parse_key('version')
1638
self.parse_key('creator')
1639
while self.parse_part():
1640
pass
1641
if not self.eof():
1642
sys.stderr.write('warning: line %u: unexpected line\n' % self.line_no)
1643
sys.stderr.write('%s\n' % self.lookahead())
1644
1645
# compute derived data
1646
self.profile.validate()
1647
self.profile.find_cycles()
1648
self.profile.ratio(TIME_RATIO, SAMPLES)
1649
self.profile.call_ratios(SAMPLES2)
1650
self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1651
1652
return self.profile
1653
1654
def parse_part(self):
1655
if not self.parse_header_line():
1656
return False
1657
while self.parse_header_line():
1658
pass
1659
if not self.parse_body_line():
1660
return False
1661
while self.parse_body_line():
1662
pass
1663
return True
1664
1665
def parse_header_line(self):
1666
return \
1667
self.parse_empty() or \
1668
self.parse_comment() or \
1669
self.parse_part_detail() or \
1670
self.parse_description() or \
1671
self.parse_event_specification() or \
1672
self.parse_cost_line_def() or \
1673
self.parse_cost_summary()
1674
1675
_detail_keys = set(('cmd', 'pid', 'thread', 'part'))
1676
1677
def parse_part_detail(self):
1678
return self.parse_keys(self._detail_keys)
1679
1680
def parse_description(self):
1681
return self.parse_key('desc') is not None
1682
1683
def parse_event_specification(self):
1684
event = self.parse_key('event')
1685
if event is None:
1686
return False
1687
return True
1688
1689
def parse_cost_line_def(self):
1690
pair = self.parse_keys(('events', 'positions'))
1691
if pair is None:
1692
return False
1693
key, value = pair
1694
items = value.split()
1695
if key == 'events':
1696
self.num_events = len(items)
1697
self.cost_events = items
1698
if key == 'positions':
1699
self.num_positions = len(items)
1700
self.cost_positions = items
1701
self.last_positions = [0]*self.num_positions
1702
return True
1703
1704
def parse_cost_summary(self):
1705
pair = self.parse_keys(('summary', 'totals'))
1706
if pair is None:
1707
return False
1708
return True
1709
1710
def parse_body_line(self):
1711
return \
1712
self.parse_empty() or \
1713
self.parse_comment() or \
1714
self.parse_cost_line() or \
1715
self.parse_position_spec() or \
1716
self.parse_association_spec()
1717
1718
__subpos_re = r'(0x[0-9a-fA-F]+|\d+|\+\d+|-\d+|\*)'
1719
_cost_re = re.compile(r'^' +
1720
__subpos_re + r'( +' + __subpos_re + r')*' +
1721
r'( +\d+)*' +
1722
'$')
1723
1724
def parse_cost_line(self, calls=None):
1725
line = self.lookahead().rstrip()
1726
mo = self._cost_re.match(line)
1727
if not mo:
1728
return False
1729
1730
function = self.get_function()
1731
1732
if calls is None:
1733
# Unlike other aspects, call object (cob) is relative not to the
1734
# last call object, but to the caller's object (ob), so try to
1735
# update it when processing a functions cost line
1736
try:
1737
self.positions['cob'] = self.positions['ob']
1738
except KeyError:
1739
pass
1740
1741
values = line.split()
1742
assert len(values) <= self.num_positions + self.num_events
1743
1744
positions = values[0 : self.num_positions]
1745
events = values[self.num_positions : ]
1746
events += ['0']*(self.num_events - len(events))
1747
1748
for i in range(self.num_positions):
1749
position = positions[i]
1750
if position == '*':
1751
position = self.last_positions[i]
1752
elif position[0] in '-+':
1753
position = self.last_positions[i] + int(position)
1754
elif position.startswith('0x'):
1755
position = int(position, 16)
1756
else:
1757
position = int(position)
1758
self.last_positions[i] = position
1759
1760
events = [float(event) for event in events]
1761
1762
if calls is None:
1763
function[SAMPLES] += events[0]
1764
self.profile[SAMPLES] += events[0]
1765
else:
1766
callee = self.get_callee()
1767
callee.called += calls
1768
1769
try:
1770
call = function.calls[callee.id]
1771
except KeyError:
1772
call = Call(callee.id)
1773
call[CALLS] = calls
1774
call[SAMPLES2] = events[0]
1775
function.add_call(call)
1776
else:
1777
call[CALLS] += calls
1778
call[SAMPLES2] += events[0]
1779
1780
self.consume()
1781
return True
1782
1783
def parse_association_spec(self):
1784
line = self.lookahead()
1785
if not line.startswith('calls='):
1786
return False
1787
1788
_, values = line.split('=', 1)
1789
values = values.strip().split()
1790
calls = int(values[0])
1791
call_position = values[1:]
1792
self.consume()
1793
1794
self.parse_cost_line(calls)
1795
1796
return True
1797
1798
_position_re = re.compile(r'^(?P<position>[cj]?(?:ob|fl|fi|fe|fn))=\s*(?:\((?P<id>\d+)\))?(?:\s*(?P<name>.+))?')
1799
1800
_position_table_map = {
1801
'ob': 'ob',
1802
'fl': 'fl',
1803
'fi': 'fl',
1804
'fe': 'fl',
1805
'fn': 'fn',
1806
'cob': 'ob',
1807
'cfl': 'fl',
1808
'cfi': 'fl',
1809
'cfe': 'fl',
1810
'cfn': 'fn',
1811
'jfi': 'fl',
1812
}
1813
1814
_position_map = {
1815
'ob': 'ob',
1816
'fl': 'fl',
1817
'fi': 'fl',
1818
'fe': 'fl',
1819
'fn': 'fn',
1820
'cob': 'cob',
1821
'cfl': 'cfl',
1822
'cfi': 'cfl',
1823
'cfe': 'cfl',
1824
'cfn': 'cfn',
1825
'jfi': 'jfi',
1826
}
1827
1828
def parse_position_spec(self):
1829
line = self.lookahead()
1830
1831
if line.startswith('jump=') or line.startswith('jcnd='):
1832
self.consume()
1833
return True
1834
1835
mo = self._position_re.match(line)
1836
if not mo:
1837
return False
1838
1839
position, id, name = mo.groups()
1840
if id:
1841
table = self._position_table_map[position]
1842
if name:
1843
self.position_ids[(table, id)] = name
1844
else:
1845
name = self.position_ids.get((table, id), '')
1846
self.positions[self._position_map[position]] = name
1847
1848
self.consume()
1849
return True
1850
1851
def parse_empty(self):
1852
if self.eof():
1853
return False
1854
line = self.lookahead()
1855
if line.strip():
1856
return False
1857
self.consume()
1858
return True
1859
1860
def parse_comment(self):
1861
line = self.lookahead()
1862
if not line.startswith('#'):
1863
return False
1864
self.consume()
1865
return True
1866
1867
_key_re = re.compile(r'^(\w+):')
1868
1869
def parse_key(self, key):
1870
pair = self.parse_keys((key,))
1871
if not pair:
1872
return None
1873
key, value = pair
1874
return value
1875
1876
def parse_keys(self, keys):
1877
line = self.lookahead()
1878
mo = self._key_re.match(line)
1879
if not mo:
1880
return None
1881
key, value = line.split(':', 1)
1882
if key not in keys:
1883
return None
1884
value = value.strip()
1885
self.consume()
1886
return key, value
1887
1888
def make_function(self, module, filename, name):
1889
# FIXME: module and filename are not being tracked reliably
1890
#id = '|'.join((module, filename, name))
1891
id = name
1892
try:
1893
function = self.profile.functions[id]
1894
except KeyError:
1895
function = Function(id, name)
1896
if module:
1897
function.module = os.path.basename(module)
1898
function[SAMPLES] = 0
1899
function.called = 0
1900
self.profile.add_function(function)
1901
return function
1902
1903
def get_function(self):
1904
module = self.positions.get('ob', '')
1905
filename = self.positions.get('fl', '')
1906
function = self.positions.get('fn', '')
1907
return self.make_function(module, filename, function)
1908
1909
def get_callee(self):
1910
module = self.positions.get('cob', '')
1911
filename = self.positions.get('cfi', '')
1912
function = self.positions.get('cfn', '')
1913
return self.make_function(module, filename, function)
1914
1915
1916
class PerfParser(LineParser):
1917
"""Parser for linux perf callgraph output.
1918
1919
It expects output generated with
1920
1921
perf record -g
1922
perf script | gprof2dot.py --format=perf
1923
"""
1924
1925
def __init__(self, infile):
1926
LineParser.__init__(self, infile)
1927
self.profile = Profile()
1928
1929
def readline(self):
1930
# Override LineParser.readline to ignore comment lines
1931
while True:
1932
LineParser.readline(self)
1933
if self.eof() or not self.lookahead().startswith('#'):
1934
break
1935
1936
def parse(self):
1937
# read lookahead
1938
self.readline()
1939
1940
profile = self.profile
1941
profile[SAMPLES] = 0
1942
while not self.eof():
1943
self.parse_event()
1944
1945
# compute derived data
1946
profile.validate()
1947
profile.find_cycles()
1948
profile.ratio(TIME_RATIO, SAMPLES)
1949
profile.call_ratios(SAMPLES2)
1950
if totalMethod == "callratios":
1951
# Heuristic approach. TOTAL_SAMPLES is unused.
1952
profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
1953
elif totalMethod == "callstacks":
1954
# Use the actual call chains for functions.
1955
profile[TOTAL_SAMPLES] = profile[SAMPLES]
1956
profile.ratio(TOTAL_TIME_RATIO, TOTAL_SAMPLES)
1957
# Then propagate that total time to the calls.
1958
for function in compat_itervalues(profile.functions):
1959
for call in compat_itervalues(function.calls):
1960
if call.ratio is not None:
1961
callee = profile.functions[call.callee_id]
1962
call[TOTAL_TIME_RATIO] = call.ratio * callee[TOTAL_TIME_RATIO]
1963
else:
1964
assert False
1965
1966
return profile
1967
1968
def parse_event(self):
1969
if self.eof():
1970
return
1971
1972
line = self.consume()
1973
assert line
1974
1975
callchain = self.parse_callchain()
1976
if not callchain:
1977
return
1978
1979
callee = callchain[0]
1980
callee[SAMPLES] += 1
1981
self.profile[SAMPLES] += 1
1982
1983
for caller in callchain[1:]:
1984
try:
1985
call = caller.calls[callee.id]
1986
except KeyError:
1987
call = Call(callee.id)
1988
call[SAMPLES2] = 1
1989
caller.add_call(call)
1990
else:
1991
call[SAMPLES2] += 1
1992
1993
callee = caller
1994
1995
# Increment TOTAL_SAMPLES only once on each function.
1996
stack = set(callchain)
1997
for function in stack:
1998
function[TOTAL_SAMPLES] += 1
1999
2000
def parse_callchain(self):
2001
callchain = []
2002
while self.lookahead():
2003
function = self.parse_call()
2004
if function is None:
2005
break
2006
callchain.append(function)
2007
if self.lookahead() == '':
2008
self.consume()
2009
return callchain
2010
2011
call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>.*)\)$')
2012
addr2_re = re.compile(r'\+0x[0-9a-fA-F]+$')
2013
2014
def parse_call(self):
2015
line = self.consume()
2016
mo = self.call_re.match(line)
2017
assert mo
2018
if not mo:
2019
return None
2020
2021
function_name = mo.group('symbol')
2022
2023
# If present, amputate program counter from function name.
2024
if function_name:
2025
function_name = re.sub(self.addr2_re, '', function_name)
2026
2027
if not function_name or function_name == '[unknown]':
2028
function_name = mo.group('address')
2029
2030
module = mo.group('module')
2031
2032
function_id = function_name + ':' + module
2033
2034
try:
2035
function = self.profile.functions[function_id]
2036
except KeyError:
2037
function = Function(function_id, function_name)
2038
function.module = os.path.basename(module)
2039
function[SAMPLES] = 0
2040
function[TOTAL_SAMPLES] = 0
2041
self.profile.add_function(function)
2042
2043
return function
2044
2045
2046
class OprofileParser(LineParser):
2047
"""Parser for oprofile callgraph output.
2048
2049
See also:
2050
- http://oprofile.sourceforge.net/doc/opreport.html#opreport-callgraph
2051
"""
2052
2053
_fields_re = {
2054
'samples': r'(\d+)',
2055
'%': r'(\S+)',
2056
'linenr info': r'(?P<source>\(no location information\)|\S+:\d+)',
2057
'image name': r'(?P<image>\S+(?:\s\(tgid:[^)]*\))?)',
2058
'app name': r'(?P<application>\S+)',
2059
'symbol name': r'(?P<symbol>\(no symbols\)|.+?)',
2060
}
2061
2062
def __init__(self, infile):
2063
LineParser.__init__(self, infile)
2064
self.entries = {}
2065
self.entry_re = None
2066
2067
def add_entry(self, callers, function, callees):
2068
try:
2069
entry = self.entries[function.id]
2070
except KeyError:
2071
self.entries[function.id] = (callers, function, callees)
2072
else:
2073
callers_total, function_total, callees_total = entry
2074
self.update_subentries_dict(callers_total, callers)
2075
function_total.samples += function.samples
2076
self.update_subentries_dict(callees_total, callees)
2077
2078
def update_subentries_dict(self, totals, partials):
2079
for partial in compat_itervalues(partials):
2080
try:
2081
total = totals[partial.id]
2082
except KeyError:
2083
totals[partial.id] = partial
2084
else:
2085
total.samples += partial.samples
2086
2087
def parse(self):
2088
# read lookahead
2089
self.readline()
2090
2091
self.parse_header()
2092
while self.lookahead():
2093
self.parse_entry()
2094
2095
profile = Profile()
2096
2097
reverse_call_samples = {}
2098
2099
# populate the profile
2100
profile[SAMPLES] = 0
2101
for _callers, _function, _callees in compat_itervalues(self.entries):
2102
function = Function(_function.id, _function.name)
2103
function[SAMPLES] = _function.samples
2104
profile.add_function(function)
2105
profile[SAMPLES] += _function.samples
2106
2107
if _function.application:
2108
function.process = os.path.basename(_function.application)
2109
if _function.image:
2110
function.module = os.path.basename(_function.image)
2111
2112
total_callee_samples = 0
2113
for _callee in compat_itervalues(_callees):
2114
total_callee_samples += _callee.samples
2115
2116
for _callee in compat_itervalues(_callees):
2117
if not _callee.self:
2118
call = Call(_callee.id)
2119
call[SAMPLES2] = _callee.samples
2120
function.add_call(call)
2121
2122
# compute derived data
2123
profile.validate()
2124
profile.find_cycles()
2125
profile.ratio(TIME_RATIO, SAMPLES)
2126
profile.call_ratios(SAMPLES2)
2127
profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2128
2129
return profile
2130
2131
def parse_header(self):
2132
while not self.match_header():
2133
self.consume()
2134
line = self.lookahead()
2135
fields = re.split(r'\s\s+', line)
2136
entry_re = r'^\s*' + r'\s+'.join([self._fields_re[field] for field in fields]) + r'(?P<self>\s+\[self\])?$'
2137
self.entry_re = re.compile(entry_re)
2138
self.skip_separator()
2139
2140
def parse_entry(self):
2141
callers = self.parse_subentries()
2142
if self.match_primary():
2143
function = self.parse_subentry()
2144
if function is not None:
2145
callees = self.parse_subentries()
2146
self.add_entry(callers, function, callees)
2147
self.skip_separator()
2148
2149
def parse_subentries(self):
2150
subentries = {}
2151
while self.match_secondary():
2152
subentry = self.parse_subentry()
2153
subentries[subentry.id] = subentry
2154
return subentries
2155
2156
def parse_subentry(self):
2157
entry = Struct()
2158
line = self.consume()
2159
mo = self.entry_re.match(line)
2160
if not mo:
2161
raise ParseError('failed to parse', line)
2162
fields = mo.groupdict()
2163
entry.samples = int(mo.group(1))
2164
if 'source' in fields and fields['source'] != '(no location information)':
2165
source = fields['source']
2166
filename, lineno = source.split(':')
2167
entry.filename = filename
2168
entry.lineno = int(lineno)
2169
else:
2170
source = ''
2171
entry.filename = None
2172
entry.lineno = None
2173
entry.image = fields.get('image', '')
2174
entry.application = fields.get('application', '')
2175
if 'symbol' in fields and fields['symbol'] != '(no symbols)':
2176
entry.symbol = fields['symbol']
2177
else:
2178
entry.symbol = ''
2179
if entry.symbol.startswith('"') and entry.symbol.endswith('"'):
2180
entry.symbol = entry.symbol[1:-1]
2181
entry.id = ':'.join((entry.application, entry.image, source, entry.symbol))
2182
entry.self = fields.get('self', None) != None
2183
if entry.self:
2184
entry.id += ':self'
2185
if entry.symbol:
2186
entry.name = entry.symbol
2187
else:
2188
entry.name = entry.image
2189
return entry
2190
2191
def skip_separator(self):
2192
while not self.match_separator():
2193
self.consume()
2194
self.consume()
2195
2196
def match_header(self):
2197
line = self.lookahead()
2198
return line.startswith('samples')
2199
2200
def match_separator(self):
2201
line = self.lookahead()
2202
return line == '-'*len(line)
2203
2204
def match_primary(self):
2205
line = self.lookahead()
2206
return not line[:1].isspace()
2207
2208
def match_secondary(self):
2209
line = self.lookahead()
2210
return line[:1].isspace()
2211
2212
2213
class HProfParser(LineParser):
2214
"""Parser for java hprof output
2215
2216
See also:
2217
- http://java.sun.com/developer/technicalArticles/Programming/HPROF.html
2218
"""
2219
2220
trace_re = re.compile(r'\t(.*)\((.*):(.*)\)')
2221
trace_id_re = re.compile(r'^TRACE (\d+):$')
2222
2223
def __init__(self, infile):
2224
LineParser.__init__(self, infile)
2225
self.traces = {}
2226
self.samples = {}
2227
2228
def parse(self):
2229
# read lookahead
2230
self.readline()
2231
2232
while not self.lookahead().startswith('------'): self.consume()
2233
while not self.lookahead().startswith('TRACE '): self.consume()
2234
2235
self.parse_traces()
2236
2237
while not self.lookahead().startswith('CPU'):
2238
self.consume()
2239
2240
self.parse_samples()
2241
2242
# populate the profile
2243
profile = Profile()
2244
profile[SAMPLES] = 0
2245
2246
functions = {}
2247
2248
# build up callgraph
2249
for id, trace in compat_iteritems(self.traces):
2250
if not id in self.samples: continue
2251
mtime = self.samples[id][0]
2252
last = None
2253
2254
for func, file, line in trace:
2255
if not func in functions:
2256
function = Function(func, func)
2257
function[SAMPLES] = 0
2258
profile.add_function(function)
2259
functions[func] = function
2260
2261
function = functions[func]
2262
# allocate time to the deepest method in the trace
2263
if not last:
2264
function[SAMPLES] += mtime
2265
profile[SAMPLES] += mtime
2266
else:
2267
c = function.get_call(last)
2268
c[SAMPLES2] += mtime
2269
2270
last = func
2271
2272
# compute derived data
2273
profile.validate()
2274
profile.find_cycles()
2275
profile.ratio(TIME_RATIO, SAMPLES)
2276
profile.call_ratios(SAMPLES2)
2277
profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2278
2279
return profile
2280
2281
def parse_traces(self):
2282
while self.lookahead().startswith('TRACE '):
2283
self.parse_trace()
2284
2285
def parse_trace(self):
2286
l = self.consume()
2287
mo = self.trace_id_re.match(l)
2288
tid = mo.group(1)
2289
last = None
2290
trace = []
2291
2292
while self.lookahead().startswith('\t'):
2293
l = self.consume()
2294
match = self.trace_re.search(l)
2295
if not match:
2296
#sys.stderr.write('Invalid line: %s\n' % l)
2297
break
2298
else:
2299
function_name, file, line = match.groups()
2300
trace += [(function_name, file, line)]
2301
2302
self.traces[int(tid)] = trace
2303
2304
def parse_samples(self):
2305
self.consume()
2306
self.consume()
2307
2308
while not self.lookahead().startswith('CPU'):
2309
rank, percent_self, percent_accum, count, traceid, method = self.lookahead().split()
2310
self.samples[int(traceid)] = (int(count), method)
2311
self.consume()
2312
2313
2314
class SysprofParser(XmlParser):
2315
2316
def __init__(self, stream):
2317
XmlParser.__init__(self, stream)
2318
2319
def parse(self):
2320
objects = {}
2321
nodes = {}
2322
2323
self.element_start('profile')
2324
while self.token.type == XML_ELEMENT_START:
2325
if self.token.name_or_data == 'objects':
2326
assert not objects
2327
objects = self.parse_items('objects')
2328
elif self.token.name_or_data == 'nodes':
2329
assert not nodes
2330
nodes = self.parse_items('nodes')
2331
else:
2332
self.parse_value(self.token.name_or_data)
2333
self.element_end('profile')
2334
2335
return self.build_profile(objects, nodes)
2336
2337
def parse_items(self, name):
2338
assert name[-1] == 's'
2339
items = {}
2340
self.element_start(name)
2341
while self.token.type == XML_ELEMENT_START:
2342
id, values = self.parse_item(name[:-1])
2343
assert id not in items
2344
items[id] = values
2345
self.element_end(name)
2346
return items
2347
2348
def parse_item(self, name):
2349
attrs = self.element_start(name)
2350
id = int(attrs['id'])
2351
values = self.parse_values()
2352
self.element_end(name)
2353
return id, values
2354
2355
def parse_values(self):
2356
values = {}
2357
while self.token.type == XML_ELEMENT_START:
2358
name = self.token.name_or_data
2359
value = self.parse_value(name)
2360
assert name not in values
2361
values[name] = value
2362
return values
2363
2364
def parse_value(self, tag):
2365
self.element_start(tag)
2366
value = self.character_data()
2367
self.element_end(tag)
2368
if value.isdigit():
2369
return int(value)
2370
if value.startswith('"') and value.endswith('"'):
2371
return value[1:-1]
2372
return value
2373
2374
def build_profile(self, objects, nodes):
2375
profile = Profile()
2376
2377
profile[SAMPLES] = 0
2378
for id, object in compat_iteritems(objects):
2379
# Ignore fake objects (process names, modules, "Everything", "kernel", etc.)
2380
if object['self'] == 0:
2381
continue
2382
2383
function = Function(id, object['name'])
2384
function[SAMPLES] = object['self']
2385
profile.add_function(function)
2386
profile[SAMPLES] += function[SAMPLES]
2387
2388
for id, node in compat_iteritems(nodes):
2389
# Ignore fake calls
2390
if node['self'] == 0:
2391
continue
2392
2393
# Find a non-ignored parent
2394
parent_id = node['parent']
2395
while parent_id != 0:
2396
parent = nodes[parent_id]
2397
caller_id = parent['object']
2398
if objects[caller_id]['self'] != 0:
2399
break
2400
parent_id = parent['parent']
2401
if parent_id == 0:
2402
continue
2403
2404
callee_id = node['object']
2405
2406
assert objects[caller_id]['self']
2407
assert objects[callee_id]['self']
2408
2409
function = profile.functions[caller_id]
2410
2411
samples = node['self']
2412
try:
2413
call = function.calls[callee_id]
2414
except KeyError:
2415
call = Call(callee_id)
2416
call[SAMPLES2] = samples
2417
function.add_call(call)
2418
else:
2419
call[SAMPLES2] += samples
2420
2421
# Compute derived events
2422
profile.validate()
2423
profile.find_cycles()
2424
profile.ratio(TIME_RATIO, SAMPLES)
2425
profile.call_ratios(SAMPLES2)
2426
profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2427
2428
return profile
2429
2430
2431
class XPerfParser(Parser):
2432
"""Parser for CSVs generted by XPerf, from Microsoft Windows Performance Tools.
2433
"""
2434
2435
def __init__(self, stream):
2436
Parser.__init__(self)
2437
self.stream = stream
2438
self.profile = Profile()
2439
self.profile[SAMPLES] = 0
2440
self.column = {}
2441
2442
def parse(self):
2443
import csv
2444
reader = csv.reader(
2445
self.stream,
2446
delimiter = ',',
2447
quotechar = None,
2448
escapechar = None,
2449
doublequote = False,
2450
skipinitialspace = True,
2451
lineterminator = '\r\n',
2452
quoting = csv.QUOTE_NONE)
2453
header = True
2454
for row in reader:
2455
if header:
2456
self.parse_header(row)
2457
header = False
2458
else:
2459
self.parse_row(row)
2460
2461
# compute derived data
2462
self.profile.validate()
2463
self.profile.find_cycles()
2464
self.profile.ratio(TIME_RATIO, SAMPLES)
2465
self.profile.call_ratios(SAMPLES2)
2466
self.profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2467
2468
return self.profile
2469
2470
def parse_header(self, row):
2471
for column in range(len(row)):
2472
name = row[column]
2473
assert name not in self.column
2474
self.column[name] = column
2475
2476
def parse_row(self, row):
2477
fields = {}
2478
for name, column in compat_iteritems(self.column):
2479
value = row[column]
2480
for factory in int, float:
2481
try:
2482
value = factory(value)
2483
except ValueError:
2484
pass
2485
else:
2486
break
2487
fields[name] = value
2488
2489
process = fields['Process Name']
2490
symbol = fields['Module'] + '!' + fields['Function']
2491
weight = fields['Weight']
2492
count = fields['Count']
2493
2494
if process == 'Idle':
2495
return
2496
2497
function = self.get_function(process, symbol)
2498
function[SAMPLES] += weight * count
2499
self.profile[SAMPLES] += weight * count
2500
2501
stack = fields['Stack']
2502
if stack != '?':
2503
stack = stack.split('/')
2504
assert stack[0] == '[Root]'
2505
if stack[-1] != symbol:
2506
# XXX: some cases the sampled function does not appear in the stack
2507
stack.append(symbol)
2508
caller = None
2509
for symbol in stack[1:]:
2510
callee = self.get_function(process, symbol)
2511
if caller is not None:
2512
try:
2513
call = caller.calls[callee.id]
2514
except KeyError:
2515
call = Call(callee.id)
2516
call[SAMPLES2] = count
2517
caller.add_call(call)
2518
else:
2519
call[SAMPLES2] += count
2520
caller = callee
2521
2522
def get_function(self, process, symbol):
2523
function_id = process + '!' + symbol
2524
2525
try:
2526
function = self.profile.functions[function_id]
2527
except KeyError:
2528
module, name = symbol.split('!', 1)
2529
function = Function(function_id, name)
2530
function.process = process
2531
function.module = module
2532
function[SAMPLES] = 0
2533
self.profile.add_function(function)
2534
2535
return function
2536
2537
2538
class SleepyParser(Parser):
2539
"""Parser for GNU gprof output.
2540
2541
See also:
2542
- http://www.codersnotes.com/sleepy/
2543
- http://sleepygraph.sourceforge.net/
2544
"""
2545
2546
stdinInput = False
2547
2548
def __init__(self, filename):
2549
Parser.__init__(self)
2550
2551
from zipfile import ZipFile
2552
2553
self.database = ZipFile(filename)
2554
2555
self.symbols = {}
2556
self.calls = {}
2557
2558
self.profile = Profile()
2559
2560
_symbol_re = re.compile(
2561
r'^(?P<id>\w+)' +
2562
r'\s+"(?P<module>[^"]*)"' +
2563
r'\s+"(?P<procname>[^"]*)"' +
2564
r'\s+"(?P<sourcefile>[^"]*)"' +
2565
r'\s+(?P<sourceline>\d+)$'
2566
)
2567
2568
def openEntry(self, name):
2569
# Some versions of verysleepy use lowercase filenames
2570
for database_name in self.database.namelist():
2571
if name.lower() == database_name.lower():
2572
name = database_name
2573
break
2574
2575
return self.database.open(name, 'r')
2576
2577
def parse_symbols(self):
2578
for line in self.openEntry('Symbols.txt'):
2579
line = line.decode('UTF-8').rstrip('\r\n')
2580
2581
mo = self._symbol_re.match(line)
2582
if mo:
2583
symbol_id, module, procname, sourcefile, sourceline = mo.groups()
2584
2585
function_id = ':'.join([module, procname])
2586
2587
try:
2588
function = self.profile.functions[function_id]
2589
except KeyError:
2590
function = Function(function_id, procname)
2591
function.module = module
2592
function[SAMPLES] = 0
2593
self.profile.add_function(function)
2594
2595
self.symbols[symbol_id] = function
2596
2597
def parse_callstacks(self):
2598
for line in self.openEntry('Callstacks.txt'):
2599
line = line.decode('UTF-8').rstrip('\r\n')
2600
2601
fields = line.split()
2602
samples = float(fields[0])
2603
callstack = fields[1:]
2604
2605
callstack = [self.symbols[symbol_id] for symbol_id in callstack]
2606
2607
callee = callstack[0]
2608
2609
callee[SAMPLES] += samples
2610
self.profile[SAMPLES] += samples
2611
2612
for caller in callstack[1:]:
2613
try:
2614
call = caller.calls[callee.id]
2615
except KeyError:
2616
call = Call(callee.id)
2617
call[SAMPLES2] = samples
2618
caller.add_call(call)
2619
else:
2620
call[SAMPLES2] += samples
2621
2622
callee = caller
2623
2624
def parse(self):
2625
profile = self.profile
2626
profile[SAMPLES] = 0
2627
2628
self.parse_symbols()
2629
self.parse_callstacks()
2630
2631
# Compute derived events
2632
profile.validate()
2633
profile.find_cycles()
2634
profile.ratio(TIME_RATIO, SAMPLES)
2635
profile.call_ratios(SAMPLES2)
2636
profile.integrate(TOTAL_TIME_RATIO, TIME_RATIO)
2637
2638
return profile
2639
2640
2641
class PstatsParser:
2642
"""Parser python profiling statistics saved with te pstats module."""
2643
2644
stdinInput = False
2645
multipleInput = True
2646
2647
def __init__(self, *filename):
2648
import pstats
2649
try:
2650
self.stats = pstats.Stats(*filename)
2651
except ValueError:
2652
if PYTHON_3:
2653
sys.stderr.write('error: failed to load %s\n' % ', '.join(filename))
2654
sys.exit(1)
2655
import hotshot.stats
2656
self.stats = hotshot.stats.load(filename[0])
2657
self.profile = Profile()
2658
self.function_ids = {}
2659
2660
def get_function_name(self, key):
2661
filename, line, name = key
2662
module = os.path.splitext(filename)[0]
2663
module = os.path.basename(module)
2664
return "%s:%d:%s" % (module, line, name)
2665
2666
def get_function(self, key):
2667
try:
2668
id = self.function_ids[key]
2669
except KeyError:
2670
id = len(self.function_ids)
2671
name = self.get_function_name(key)
2672
function = Function(id, name)
2673
function.filename = key[0]
2674
self.profile.functions[id] = function
2675
self.function_ids[key] = id
2676
else:
2677
function = self.profile.functions[id]
2678
return function
2679
2680
def parse(self):
2681
self.profile[TIME] = 0.0
2682
self.profile[TOTAL_TIME] = self.stats.total_tt
2683
for fn, (cc, nc, tt, ct, callers) in compat_iteritems(self.stats.stats):
2684
callee = self.get_function(fn)
2685
callee.called = nc
2686
callee[TOTAL_TIME] = ct
2687
callee[TIME] = tt
2688
self.profile[TIME] += tt
2689
self.profile[TOTAL_TIME] = max(self.profile[TOTAL_TIME], ct)
2690
for fn, value in compat_iteritems(callers):
2691
caller = self.get_function(fn)
2692
call = Call(callee.id)
2693
if isinstance(value, tuple):
2694
for i in xrange(0, len(value), 4):
2695
nc, cc, tt, ct = value[i:i+4]
2696
if CALLS in call:
2697
call[CALLS] += cc
2698
else:
2699
call[CALLS] = cc
2700
2701
if TOTAL_TIME in call:
2702
call[TOTAL_TIME] += ct
2703
else:
2704
call[TOTAL_TIME] = ct
2705
2706
else:
2707
call[CALLS] = value
2708
call[TOTAL_TIME] = ratio(value, nc)*ct
2709
2710
caller.add_call(call)
2711
2712
if False:
2713
self.stats.print_stats()
2714
self.stats.print_callees()
2715
2716
# Compute derived events
2717
self.profile.validate()
2718
self.profile.ratio(TIME_RATIO, TIME)
2719
self.profile.ratio(TOTAL_TIME_RATIO, TOTAL_TIME)
2720
2721
return self.profile
2722
2723
2724
formats = {
2725
"axe": AXEParser,
2726
"callgrind": CallgrindParser,
2727
"hprof": HProfParser,
2728
"json": JsonParser,
2729
"oprofile": OprofileParser,
2730
"perf": PerfParser,
2731
"prof": GprofParser,
2732
"pstats": PstatsParser,
2733
"sleepy": SleepyParser,
2734
"sysprof": SysprofParser,
2735
"xperf": XPerfParser,
2736
}
2737
2738
2739
########################################################################
2740
# Output
2741
2742
2743
class Theme:
2744
2745
def __init__(self,
2746
bgcolor = (0.0, 0.0, 1.0),
2747
mincolor = (0.0, 0.0, 0.0),
2748
maxcolor = (0.0, 0.0, 1.0),
2749
fontname = "Arial",
2750
fontcolor = "white",
2751
nodestyle = "filled",
2752
minfontsize = 10.0,
2753
maxfontsize = 10.0,
2754
minpenwidth = 0.5,
2755
maxpenwidth = 4.0,
2756
gamma = 2.2,
2757
skew = 1.0):
2758
self.bgcolor = bgcolor
2759
self.mincolor = mincolor
2760
self.maxcolor = maxcolor
2761
self.fontname = fontname
2762
self.fontcolor = fontcolor
2763
self.nodestyle = nodestyle
2764
self.minfontsize = minfontsize
2765
self.maxfontsize = maxfontsize
2766
self.minpenwidth = minpenwidth
2767
self.maxpenwidth = maxpenwidth
2768
self.gamma = gamma
2769
self.skew = skew
2770
2771
def graph_bgcolor(self):
2772
return self.hsl_to_rgb(*self.bgcolor)
2773
2774
def graph_fontname(self):
2775
return self.fontname
2776
2777
def graph_fontcolor(self):
2778
return self.fontcolor
2779
2780
def graph_fontsize(self):
2781
return self.minfontsize
2782
2783
def node_bgcolor(self, weight):
2784
return self.color(weight)
2785
2786
def node_fgcolor(self, weight):
2787
if self.nodestyle == "filled":
2788
return self.graph_bgcolor()
2789
else:
2790
return self.color(weight)
2791
2792
def node_fontsize(self, weight):
2793
return self.fontsize(weight)
2794
2795
def node_style(self):
2796
return self.nodestyle
2797
2798
def edge_color(self, weight):
2799
return self.color(weight)
2800
2801
def edge_fontsize(self, weight):
2802
return self.fontsize(weight)
2803
2804
def edge_penwidth(self, weight):
2805
return max(weight*self.maxpenwidth, self.minpenwidth)
2806
2807
def edge_arrowsize(self, weight):
2808
return 0.5 * math.sqrt(self.edge_penwidth(weight))
2809
2810
def fontsize(self, weight):
2811
return max(weight**2 * self.maxfontsize, self.minfontsize)
2812
2813
def color(self, weight):
2814
weight = min(max(weight, 0.0), 1.0)
2815
2816
hmin, smin, lmin = self.mincolor
2817
hmax, smax, lmax = self.maxcolor
2818
2819
if self.skew < 0:
2820
raise ValueError("Skew must be greater than 0")
2821
elif self.skew == 1.0:
2822
h = hmin + weight*(hmax - hmin)
2823
s = smin + weight*(smax - smin)
2824
l = lmin + weight*(lmax - lmin)
2825
else:
2826
base = self.skew
2827
h = hmin + ((hmax-hmin)*(-1.0 + (base ** weight)) / (base - 1.0))
2828
s = smin + ((smax-smin)*(-1.0 + (base ** weight)) / (base - 1.0))
2829
l = lmin + ((lmax-lmin)*(-1.0 + (base ** weight)) / (base - 1.0))
2830
2831
return self.hsl_to_rgb(h, s, l)
2832
2833
def hsl_to_rgb(self, h, s, l):
2834
"""Convert a color from HSL color-model to RGB.
2835
2836
See also:
2837
- http://www.w3.org/TR/css3-color/#hsl-color
2838
"""
2839
2840
h = h % 1.0
2841
s = min(max(s, 0.0), 1.0)
2842
l = min(max(l, 0.0), 1.0)
2843
2844
if l <= 0.5:
2845
m2 = l*(s + 1.0)
2846
else:
2847
m2 = l + s - l*s
2848
m1 = l*2.0 - m2
2849
r = self._hue_to_rgb(m1, m2, h + 1.0/3.0)
2850
g = self._hue_to_rgb(m1, m2, h)
2851
b = self._hue_to_rgb(m1, m2, h - 1.0/3.0)
2852
2853
# Apply gamma correction
2854
r **= self.gamma
2855
g **= self.gamma
2856
b **= self.gamma
2857
2858
return (r, g, b)
2859
2860
def _hue_to_rgb(self, m1, m2, h):
2861
if h < 0.0:
2862
h += 1.0
2863
elif h > 1.0:
2864
h -= 1.0
2865
if h*6 < 1.0:
2866
return m1 + (m2 - m1)*h*6.0
2867
elif h*2 < 1.0:
2868
return m2
2869
elif h*3 < 2.0:
2870
return m1 + (m2 - m1)*(2.0/3.0 - h)*6.0
2871
else:
2872
return m1
2873
2874
2875
TEMPERATURE_COLORMAP = Theme(
2876
mincolor = (2.0/3.0, 0.80, 0.25), # dark blue
2877
maxcolor = (0.0, 1.0, 0.5), # satured red
2878
gamma = 1.0
2879
)
2880
2881
PINK_COLORMAP = Theme(
2882
mincolor = (0.0, 1.0, 0.90), # pink
2883
maxcolor = (0.0, 1.0, 0.5), # satured red
2884
)
2885
2886
GRAY_COLORMAP = Theme(
2887
mincolor = (0.0, 0.0, 0.85), # light gray
2888
maxcolor = (0.0, 0.0, 0.0), # black
2889
)
2890
2891
BW_COLORMAP = Theme(
2892
minfontsize = 8.0,
2893
maxfontsize = 24.0,
2894
mincolor = (0.0, 0.0, 0.0), # black
2895
maxcolor = (0.0, 0.0, 0.0), # black
2896
minpenwidth = 0.1,
2897
maxpenwidth = 8.0,
2898
)
2899
2900
PRINT_COLORMAP = Theme(
2901
minfontsize = 18.0,
2902
maxfontsize = 30.0,
2903
fontcolor = "black",
2904
nodestyle = "solid",
2905
mincolor = (0.0, 0.0, 0.0), # black
2906
maxcolor = (0.0, 0.0, 0.0), # black
2907
minpenwidth = 0.1,
2908
maxpenwidth = 8.0,
2909
)
2910
2911
2912
themes = {
2913
"color": TEMPERATURE_COLORMAP,
2914
"pink": PINK_COLORMAP,
2915
"gray": GRAY_COLORMAP,
2916
"bw": BW_COLORMAP,
2917
"print": PRINT_COLORMAP,
2918
}
2919
2920
2921
def sorted_iteritems(d):
2922
# Used mostly for result reproducibility (while testing.)
2923
keys = compat_keys(d)
2924
keys.sort()
2925
for key in keys:
2926
value = d[key]
2927
yield key, value
2928
2929
2930
class DotWriter:
2931
"""Writer for the DOT language.
2932
2933
See also:
2934
- "The DOT Language" specification
2935
http://www.graphviz.org/doc/info/lang.html
2936
"""
2937
2938
strip = False
2939
wrap = False
2940
2941
def __init__(self, fp):
2942
self.fp = fp
2943
2944
def wrap_function_name(self, name):
2945
"""Split the function name on multiple lines."""
2946
2947
if len(name) > 32:
2948
ratio = 2.0/3.0
2949
height = max(int(len(name)/(1.0 - ratio) + 0.5), 1)
2950
width = max(len(name)/height, 32)
2951
# TODO: break lines in symbols
2952
name = textwrap.fill(name, width, break_long_words=False)
2953
2954
# Take away spaces
2955
name = name.replace(", ", ",")
2956
name = name.replace("> >", ">>")
2957
name = name.replace("> >", ">>") # catch consecutive
2958
2959
return name
2960
2961
show_function_events = [TOTAL_TIME_RATIO, TIME_RATIO]
2962
show_edge_events = [TOTAL_TIME_RATIO, CALLS]
2963
2964
def graph(self, profile, theme):
2965
self.begin_graph()
2966
2967
fontname = theme.graph_fontname()
2968
fontcolor = theme.graph_fontcolor()
2969
nodestyle = theme.node_style()
2970
2971
self.attr('graph', fontname=fontname, ranksep=0.25, nodesep=0.125)
2972
self.attr('node', fontname=fontname, shape="box", style=nodestyle, fontcolor=fontcolor, width=0, height=0)
2973
self.attr('edge', fontname=fontname)
2974
2975
for _, function in sorted_iteritems(profile.functions):
2976
labels = []
2977
if function.process is not None:
2978
labels.append(function.process)
2979
if function.module is not None:
2980
labels.append(function.module)
2981
2982
if self.strip:
2983
function_name = function.stripped_name()
2984
else:
2985
function_name = function.name
2986
2987
# dot can't parse quoted strings longer than YY_BUF_SIZE, which
2988
# defaults to 16K. But some annotated C++ functions (e.g., boost,
2989
# https://github.com/jrfonseca/gprof2dot/issues/30) can exceed that
2990
MAX_FUNCTION_NAME = 4096
2991
if len(function_name) >= MAX_FUNCTION_NAME:
2992
sys.stderr.write('warning: truncating function name with %u chars (%s)\n' % (len(function_name), function_name[:32] + '...'))
2993
function_name = function_name[:MAX_FUNCTION_NAME - 1] + unichr(0x2026)
2994
2995
if self.wrap:
2996
function_name = self.wrap_function_name(function_name)
2997
labels.append(function_name)
2998
2999
for event in self.show_function_events:
3000
if event in function.events:
3001
label = event.format(function[event])
3002
labels.append(label)
3003
if function.called is not None:
3004
labels.append("%u%s" % (function.called, MULTIPLICATION_SIGN))
3005
3006
if function.weight is not None:
3007
weight = function.weight
3008
else:
3009
weight = 0.0
3010
3011
label = '\n'.join(labels)
3012
self.node(function.id,
3013
label = label,
3014
color = self.color(theme.node_bgcolor(weight)),
3015
fontcolor = self.color(theme.node_fgcolor(weight)),
3016
fontsize = "%.2f" % theme.node_fontsize(weight),
3017
tooltip = function.filename,
3018
)
3019
3020
for _, call in sorted_iteritems(function.calls):
3021
callee = profile.functions[call.callee_id]
3022
3023
labels = []
3024
for event in self.show_edge_events:
3025
if event in call.events:
3026
label = event.format(call[event])
3027
labels.append(label)
3028
3029
if call.weight is not None:
3030
weight = call.weight
3031
elif callee.weight is not None:
3032
weight = callee.weight
3033
else:
3034
weight = 0.0
3035
3036
label = '\n'.join(labels)
3037
3038
self.edge(function.id, call.callee_id,
3039
label = label,
3040
color = self.color(theme.edge_color(weight)),
3041
fontcolor = self.color(theme.edge_color(weight)),
3042
fontsize = "%.2f" % theme.edge_fontsize(weight),
3043
penwidth = "%.2f" % theme.edge_penwidth(weight),
3044
labeldistance = "%.2f" % theme.edge_penwidth(weight),
3045
arrowsize = "%.2f" % theme.edge_arrowsize(weight),
3046
)
3047
3048
self.end_graph()
3049
3050
def begin_graph(self):
3051
self.write('digraph {\n')
3052
3053
def end_graph(self):
3054
self.write('}\n')
3055
3056
def attr(self, what, **attrs):
3057
self.write("\t")
3058
self.write(what)
3059
self.attr_list(attrs)
3060
self.write(";\n")
3061
3062
def node(self, node, **attrs):
3063
self.write("\t")
3064
self.id(node)
3065
self.attr_list(attrs)
3066
self.write(";\n")
3067
3068
def edge(self, src, dst, **attrs):
3069
self.write("\t")
3070
self.id(src)
3071
self.write(" -> ")
3072
self.id(dst)
3073
self.attr_list(attrs)
3074
self.write(";\n")
3075
3076
def attr_list(self, attrs):
3077
if not attrs:
3078
return
3079
self.write(' [')
3080
first = True
3081
for name, value in sorted_iteritems(attrs):
3082
if value is None:
3083
continue
3084
if first:
3085
first = False
3086
else:
3087
self.write(", ")
3088
self.id(name)
3089
self.write('=')
3090
self.id(value)
3091
self.write(']')
3092
3093
def id(self, id):
3094
if isinstance(id, (int, float)):
3095
s = str(id)
3096
elif isinstance(id, basestring):
3097
if id.isalnum() and not id.startswith('0x'):
3098
s = id
3099
else:
3100
s = self.escape(id)
3101
else:
3102
raise TypeError
3103
self.write(s)
3104
3105
def color(self, rgb):
3106
r, g, b = rgb
3107
3108
def float2int(f):
3109
if f <= 0.0:
3110
return 0
3111
if f >= 1.0:
3112
return 255
3113
return int(255.0*f + 0.5)
3114
3115
return "#" + "".join(["%02x" % float2int(c) for c in (r, g, b)])
3116
3117
def escape(self, s):
3118
if not PYTHON_3:
3119
s = s.encode('utf-8')
3120
s = s.replace('\\', r'\\')
3121
s = s.replace('\n', r'\n')
3122
s = s.replace('\t', r'\t')
3123
s = s.replace('"', r'\"')
3124
return '"' + s + '"'
3125
3126
def write(self, s):
3127
self.fp.write(s)
3128
3129
3130
3131
########################################################################
3132
# Main program
3133
3134
3135
def naturalJoin(values):
3136
if len(values) >= 2:
3137
return ', '.join(values[:-1]) + ' or ' + values[-1]
3138
3139
else:
3140
return ''.join(values)
3141
3142
3143
def main():
3144
"""Main program."""
3145
3146
global totalMethod
3147
3148
formatNames = list(formats.keys())
3149
formatNames.sort()
3150
3151
optparser = optparse.OptionParser(
3152
usage="\n\t%prog [options] [file] ...")
3153
optparser.add_option(
3154
'-o', '--output', metavar='FILE',
3155
type="string", dest="output",
3156
help="output filename [stdout]")
3157
optparser.add_option(
3158
'-n', '--node-thres', metavar='PERCENTAGE',
3159
type="float", dest="node_thres", default=0.5,
3160
help="eliminate nodes below this threshold [default: %default]")
3161
optparser.add_option(
3162
'-e', '--edge-thres', metavar='PERCENTAGE',
3163
type="float", dest="edge_thres", default=0.1,
3164
help="eliminate edges below this threshold [default: %default]")
3165
optparser.add_option(
3166
'-f', '--format',
3167
type="choice", choices=formatNames,
3168
dest="format", default="prof",
3169
help="profile format: %s [default: %%default]" % naturalJoin(formatNames))
3170
optparser.add_option(
3171
'--total',
3172
type="choice", choices=('callratios', 'callstacks'),
3173
dest="totalMethod", default=totalMethod,
3174
help="preferred method of calculating total time: callratios or callstacks (currently affects only perf format) [default: %default]")
3175
optparser.add_option(
3176
'-c', '--colormap',
3177
type="choice", choices=('color', 'pink', 'gray', 'bw', 'print'),
3178
dest="theme", default="color",
3179
help="color map: color, pink, gray, bw, or print [default: %default]")
3180
optparser.add_option(
3181
'-s', '--strip',
3182
action="store_true",
3183
dest="strip", default=False,
3184
help="strip function parameters, template parameters, and const modifiers from demangled C++ function names")
3185
optparser.add_option(
3186
'--colour-nodes-by-selftime',
3187
action="store_true",
3188
dest="colour_nodes_by_selftime", default=False,
3189
help="colour nodes by self time, rather than by total time (sum of self and descendants)")
3190
optparser.add_option(
3191
'-w', '--wrap',
3192
action="store_true",
3193
dest="wrap", default=False,
3194
help="wrap function names")
3195
optparser.add_option(
3196
'--show-samples',
3197
action="store_true",
3198
dest="show_samples", default=False,
3199
help="show function samples")
3200
# add option to create subtree or show paths
3201
optparser.add_option(
3202
'-z', '--root',
3203
type="string",
3204
dest="root", default="",
3205
help="prune call graph to show only descendants of specified root function")
3206
optparser.add_option(
3207
'-l', '--leaf',
3208
type="string",
3209
dest="leaf", default="",
3210
help="prune call graph to show only ancestors of specified leaf function")
3211
# add a new option to control skew of the colorization curve
3212
optparser.add_option(
3213
'--skew',
3214
type="float", dest="theme_skew", default=1.0,
3215
help="skew the colorization curve. Values < 1.0 give more variety to lower percentages. Values > 1.0 give less variety to lower percentages")
3216
(options, args) = optparser.parse_args(sys.argv[1:])
3217
3218
if len(args) > 1 and options.format != 'pstats':
3219
optparser.error('incorrect number of arguments')
3220
3221
try:
3222
theme = themes[options.theme]
3223
except KeyError:
3224
optparser.error('invalid colormap \'%s\'' % options.theme)
3225
3226
# set skew on the theme now that it has been picked.
3227
if options.theme_skew:
3228
theme.skew = options.theme_skew
3229
3230
totalMethod = options.totalMethod
3231
3232
try:
3233
Format = formats[options.format]
3234
except KeyError:
3235
optparser.error('invalid format \'%s\'' % options.format)
3236
3237
if Format.stdinInput:
3238
if not args:
3239
fp = sys.stdin
3240
elif PYTHON_3:
3241
fp = open(args[0], 'rt', encoding='UTF-8')
3242
else:
3243
fp = open(args[0], 'rt')
3244
parser = Format(fp)
3245
elif Format.multipleInput:
3246
if not args:
3247
optparser.error('at least a file must be specified for %s input' % options.format)
3248
parser = Format(*args)
3249
else:
3250
if len(args) != 1:
3251
optparser.error('exactly one file must be specified for %s input' % options.format)
3252
parser = Format(args[0])
3253
3254
profile = parser.parse()
3255
3256
if options.output is None:
3257
if PYTHON_3:
3258
output = open(sys.stdout.fileno(), mode='wt', encoding='UTF-8', closefd=False)
3259
else:
3260
output = sys.stdout
3261
else:
3262
if PYTHON_3:
3263
output = open(options.output, 'wt', encoding='UTF-8')
3264
else:
3265
output = open(options.output, 'wt')
3266
3267
dot = DotWriter(output)
3268
dot.strip = options.strip
3269
dot.wrap = options.wrap
3270
if options.show_samples:
3271
dot.show_function_events.append(SAMPLES)
3272
3273
profile = profile
3274
profile.prune(options.node_thres/100.0, options.edge_thres/100.0, options.colour_nodes_by_selftime)
3275
3276
if options.root:
3277
rootId = profile.getFunctionId(options.root)
3278
if not rootId:
3279
sys.stderr.write('root node ' + options.root + ' not found (might already be pruned : try -e0 -n0 flags)\n')
3280
sys.exit(1)
3281
profile.prune_root(rootId)
3282
if options.leaf:
3283
leafId = profile.getFunctionId(options.leaf)
3284
if not leafId:
3285
sys.stderr.write('leaf node ' + options.leaf + ' not found (maybe already pruned : try -e0 -n0 flags)\n')
3286
sys.exit(1)
3287
profile.prune_leaf(leafId)
3288
3289
dot.graph(profile, theme)
3290
3291
3292
if __name__ == '__main__':
3293
main()
3294
3295