-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathroofline.py
More file actions
179 lines (141 loc) · 6.84 KB
/
roofline.py
File metadata and controls
179 lines (141 loc) · 6.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from __future__ import print_function
import sys
import os
import math
import pandas as pd
import numpy as np
import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib import cm
from colorspacious import cspace_converter
from collections import OrderedDict
# This style requires $DISPLAY available.
# Use it instead of matplotlib.use('Agg') if you have GUI environment
#matplotlib.style.use('ggplot')
pd.options.display.max_rows = 20
advisor_path='c:/Program Files (x86)/IntelSWTools/Advisor 2020/pythonapi'
sys.path.append(advisor_path)
print(sys.path)
gflops_roof_names=['Scalar Add Peak','DP Vector Add Peak','DP Vector FMA Peak']
try:
import advisor
except ImportError:
print('Import error: Python could not load advisor python library. Possible reasons:\n'
'1. Python cannot resolve path to Advisor\'s pythonapi directory. '
'To fix, either manually add path to the pythonapi directory into PYTHONPATH environment variable,'
' or use advixe-vars.* scripts to set up product environment variables automatically.\n'
'2. Incompatible runtime versions used by advisor python library and other packages '
'(such as matplotlib or pandas). To fix, either try to change import order or update other package '
'version if possible.')
sys.exit(1)
if len(sys.argv) < 2:
print('Usage: "python {} path_to_project_dir <num_cores>"'.format(__file__))
sys.exit(2)
project = advisor.open_project(sys.argv[1])
data = project.load(advisor.SURVEY)
#take elapsed time from the topdown stack
tot_elapsed_time=float(next(data.topdown).total_elapsed_time)
print(tot_elapsed_time)
rows = [{col: row[col] for col in row} for row in data.bottomup]
#set number of cores (or number of threads)
num_cores = 1
if len(sys.argv) == 3:
num_cores = int(sys.argv[2])
if num_cores == 1:
roofs = data.get_roofs(1, advisor.RoofsStrategy.SINGLE_THREAD)
else:
roofs = data.get_roofs(num_cores, advisor.RoofsStrategy.MULTI_THREAD)
# roofs = data.get_roofs()
df = pd.DataFrame(rows).replace('', np.nan)
df.self_elapsed_time=df.self_elapsed_time.astype(float)
df.self_gflop = df.self_gflop.astype(float)
#aggregate by function_call_sites_and_loops
aggregation_functions = {'function_call_sites_and_loops': 'first', 'self_elapsed_time': 'sum',
'self_gflop': 'sum', 'self_ai': 'first'}
df = df.groupby(['function_call_sites_and_loops','self_ai']).aggregate(aggregation_functions)
df.self_ai = df.self_ai.astype(float)
#filter out NaN inplace
df.dropna(subset=['self_ai'],inplace=True)
df=df.loc[df['self_ai'] > 1.e-8]
df['weight']= df.self_elapsed_time/tot_elapsed_time*100
df['marker_size'] = df.apply(lambda row: max(30,row.weight*30), axis = 1)
df['self_gflops']= df.self_gflop/df.self_elapsed_time
# take only weight > 5%
df=df[df['weight']>0.5]
df=df.sort_values(by=['weight'], ascending=False)
print(df[['weight','self_ai','self_gflops']])
#print(df[['function_call_sites_and_loops','self_elapsed_time','weight','self_ai', 'self_gflop','self_gflops']].dropna())
width=1000
fig,ax = plt.subplots()
max_compute_roof = max(roofs, key=lambda roof: roof.bandwidth if 'bandwidth' not in roof.name.lower() else 0)
max_compute_bandwidth = max_compute_roof.bandwidth // math.pow(10, 9) # converting to GByte/s
for roof in roofs:
# by default drawing multi threaded roofs only
roof_trunc = roof.name
if num_cores == 1:
#remove '(single-threaded)'
roof_trunc = roof_trunc.replace(' (single-threaded)','')
# memory roofs
if roof_trunc == 'DRAM Bandwidth':
bandwidth = roof.bandwidth / math.pow(10, 9) # converting to GByte/s
bw_label = '{} {:.0f} GB/s'.format(roof_trunc, bandwidth)
# compute roofs
if roof_trunc in gflops_roof_names:
gflops = roof.bandwidth / math.pow(10, 9) # converting to GFlOPS
if roof_trunc=='DP Vector FMA Peak':
gflops_dp_fma=gflops
if roof_trunc=='Scalar Add Peak':
gflops_scalar_add=gflops
x1, x2 = gflops/bandwidth, width
y1, y2 = gflops, gflops
label = '{} {:.0f} GFLOPS'.format(roof_trunc, gflops)
ax.annotate(label, xy=(width, y1), xytext=(-5,4), textcoords="offset points",horizontalalignment='right')
ax.plot([x1, x2], [y1, y2], '-', label=label,color='black')
ax.plot([0, x1], [y1, y2], '--', label=label,color='black')
#plot BW roofline
x1, x2 = 0, gflops_dp_fma/bandwidth
y1, y2 = 0, gflops_dp_fma
angle_data = np.rad2deg(np.arctan2(y2-y1, x2-x1))
ax.plot([x1, x2], [y1, y2], '-',color='red')
ylim0=1e-2
#plot different regions of bound type
x1, x2 = gflops_scalar_add/bandwidth, gflops_scalar_add/bandwidth
y1, y2 = ylim0,bandwidth*x2
ax.plot([x1, x2], [y1, y2], '--', color='red')
ax.annotate('Memory bound', xy=(x2, ylim0), xytext=(-5,35), textcoords="offset points",horizontalalignment='right',color='red',fontsize=12)
x1, x2 = gflops_dp_fma/bandwidth, gflops_dp_fma/bandwidth
y1, y2 = ylim0, gflops_dp_fma
ax.plot([x1, x2], [y1, y2], '--', color='red')
ax.annotate('Compute bound', xy=(x2, ylim0), xytext=(+5,35), textcoords="offset points",horizontalalignment='left',color='red',fontsize=12)
ax.tick_params(which='major',labelsize=14,length=6)
ax.tick_params(which='minor',labelsize=14,length=3)
# drawing points using the same ax
ax.set_xscale('log', nonposx='clip')
ax.set_yscale('log', nonposy='clip')
ax.set(ylim=(ylim0, 1e+3), xlim=(1e-3, width))
ax.set_xlabel('Arithmetic intensity [FLOP/byte]',fontsize=14)
ax.set_ylabel('Performance [GFLOP/sec]',fontsize=14)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linestyle='dashed')
ax.xaxis.grid(color='gray', linestyle='dashed')
sc=ax.scatter(df.self_ai, df.self_gflops, c=df.weight, s=df.marker_size, alpha=0.6,
linewidths=0.7,edgecolors='black',
cmap='rainbow')
#sc=ax.scatter(df.self_ai, df.self_gflops, s=10)
#sc.set_clim(vmin=min(df.weight),vmax=max(df.weight))
plt.colorbar(sc,label='Self elapsed time [%]')
plt.tight_layout()
#bw label - order is important, i.e. AFTER applying xlimit, ylimit
bw_label_loc = np.array((0.001, bandwidth*0.001))
angle_screen = ax.transData.transform_angles(np.array((angle_data,)),
bw_label_loc.reshape((1, 2)))[0]
# using `annotate` allows to specify an offset in units of points
ax.annotate(bw_label, xy=(bw_label_loc[0], bw_label_loc[1]), xytext=(10,15), textcoords="offset points",
rotation_mode='anchor', rotation=angle_screen)
#plt.legend(loc='lower right', fancybox=True, prop={'size': 6})
# saving the chart as PNG image
plt.savefig('roofline.png')
# saving the chart in SVG vector format
plt.savefig('roofline.svg')
print('Roofline chart has been generated and saved into roofline.png and roofline.svg files in the current directory')