auto-vectorization check for gcc (#172)
* Autovectorization tool: - sync output for gnu make - Reduced html output - links for line numbers - AutoVectorization.md Signed-off-by: AbdelRauf <rauf@konduit.ai> * Detailed report with `-fsave-optimization-record` option Signed-off-by: AbdelRauf <rauf@konduit.ai> * Readme Signed-off-by: AbdelRauf <rauf@konduit.ai> Co-authored-by: raver119 <raver119@gmail.com>master
parent
7a7ee4b021
commit
f25056363b
|
@ -5,7 +5,7 @@ option(NATIVE "Optimize for build machine (might not work on others)" OFF)
|
|||
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
|
||||
#ensure we create lib files
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS OFF)
|
||||
|
||||
option(CHECK_VECTORIZATION "checks for vectorization" OFF)
|
||||
option(BUILD_TESTS "Build tests" OFF)
|
||||
option(FLATBUFFERS_BUILD_FLATC "Enable the build of the flatbuffers compiler" OFF)
|
||||
set(FLATBUFFERS_BUILD_FLATC "OFF" CACHE STRING "Hack to disable flatc build" FORCE)
|
||||
|
|
|
@ -17,8 +17,11 @@ There's few additional arguments for `buildnativeoperations.sh` script you could
|
|||
-b release OR -b debug // enables/desables debug builds. release is considered by default
|
||||
-j XX // this argument defines how many threads will be used to binaries on your box. i.e. -j 8
|
||||
-cc XX// CUDA-only argument, builds only binaries for target GPU architecture. use this for fast builds
|
||||
--check-vectorization auto-vectorization report for developers. (Currently, only GCC is supported)
|
||||
```
|
||||
|
||||
[More about AutoVectorization report](auto_vectorization/AutoVectorization.md)
|
||||
|
||||
You can find the compute capability for your card [on the NVIDIA website here](https://developer.nvidia.com/cuda-gpus).
|
||||
|
||||
For example, a GTX 1080 has compute capability 6.1, for which you would use ```-cc 61``` (note no decimal point).
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
# Auto-vectorization Report
|
||||
|
||||
This report tool is used to get a human-friendly compiler output of the auto-vectorization process. It is intended for developers to help them to investigate the obstacles that compiler faced during auto-vectorization.
|
||||
|
||||
## Usage
|
||||
```--check-vectorization``` option should be added to the **release** build to be able to get the auto-vectorization report
|
||||
```./buildnativeoperations.sh -a native -j 28 --check-vectorization```
|
||||
it will output ```vecmiss.html``` inside blasbuild/cpu folder.
|
||||
|
||||
## Report Format
|
||||
Each filename contains info about optimization attempts for the source code lines.
|
||||
Each line number is also expandable (⇲) and contains distinct failure notes.
|
||||
It is possible to click on the line number to see source code
|
||||
|
||||
| file name | total successful attempts | total failed attempts | ⇲ |
|
||||
|---|---|---|--|
|
||||
| line number | successful attempts | failed attempts | ⇲ |
|
||||
|- failure reasons |
|
||||
| line number | successful attempts | failed attempts |⇲ |
|
||||
|
||||
##### Requirements
|
||||
- GCC (Currently, only GCC is supported)
|
||||
- python3
|
||||
|
||||
### Detailed report with `-fsave-optimization-record` option:
|
||||
If you want to get more detailed information (for now it reports the functions of failures) you should use new version of the toolchain (GCC > 9). As the new version of GCC compilers have `-fsave-optimization-record` option.
|
||||
`buildnativeoperations.sh` using CMake will detect it and switch to the more detailed version.
|
||||
Please, note that this option is still experimental and so the compiler can fail to output some json.gz file with error.
|
||||
On that case try to exclude those files from the build.
|
||||
And also the internal structure of the `-fsave-optimization-record` json.gz can be changed in future.
|
||||
|
||||
It outputs two files **vecmiss_fsave.html** and **vecmiss_fsave.html.js**. So to see report details you need to enable javascript on browser if it was disabled.
|
||||
|
||||
##### Requirements for the Detailed report
|
||||
- GCC version > 9
|
||||
- python3
|
||||
- Cython (python3)
|
||||
- json (python3)
|
||||
- gzip (python3)
|
||||
- c++filt
|
||||
|
||||
Internally, we are using Cython to speed up json.gz file processing (bigGzipJson.pyx). Because json.gz files can take big memory in raw when loaded in whole.
|
||||
|
||||
If you want to use bigGzipJson outside `buildnativeoperations.sh` and CMake then you should compile it manually using this command in auto_vectorization folder:
|
||||
`python3 cython_setup.py build_ext --inplace`
|
||||
|
||||
json.gz files could be processed outside of `buildnativeoperations.sh`.
|
||||
You need to call `python3 auto_vect.py --fsave` inside base source folder and where json.gz files exist.
|
||||
|
|
@ -0,0 +1,546 @@
|
|||
'''
|
||||
@author : Abdelrauf rauf@konduit.ai
|
||||
'''
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import fnmatch
|
||||
import json
|
||||
import gzip
|
||||
try:
|
||||
from bigGzipJson import json_gzip_extract_objects
|
||||
except ImportError:
|
||||
pass
|
||||
from pathlib import Path
|
||||
from multiprocessing import Pool, Manager ,cpu_count
|
||||
import traceback
|
||||
import html
|
||||
|
||||
mtch = re.compile(r"[^/]*([^:]+)\:(\d+)\:(\d+)\:(.*)")
|
||||
replace_msg = re.compile(r"(\d+)?\.?(\d+)?_?\d+\.?(\d+)?")
|
||||
progress_msg = re.compile(r"\s{0,4}\[\s{0,2}\d+\%\]")
|
||||
file_dir_strip = str(Path(os.getcwd()))
|
||||
pp_index = file_dir_strip.rfind("libnd4j")
|
||||
if pp_index>=0:
|
||||
file_dir_strip =file_dir_strip[:pp_index+len("libnd4j")]
|
||||
BASE_URL = "https://github.com/eclipse/deeplearning4j/tree/master/libnd4j/"
|
||||
if BASE_URL.endswith("/")==False:
|
||||
BASE_URL = BASE_URL + "/"
|
||||
#print(file_dir_strip)
|
||||
class info:
|
||||
def __repr__(self):
|
||||
return str(self.__dict__)
|
||||
|
||||
FSAVE_IGNORE_EXTERNALS = True
|
||||
|
||||
def get_cxx_filt_result(strx):
|
||||
if len(strx)<1:
|
||||
return ""
|
||||
res = subprocess.Popen(["c++filt","-i", strx], stdout=subprocess.PIPE).communicate()[0]
|
||||
res =res.decode('utf-8')
|
||||
#replace some long names to reduce size
|
||||
res = res.replace("unsigned long long", "uLL")
|
||||
res = res.replace("unsigned long int","uL")
|
||||
res = res.replace("unsigned long", "uL")
|
||||
res = res.replace("unsigned int", "ui")
|
||||
res = res.replace("unsigned char", "uchar")
|
||||
res = res.replace("unsigned short", "ushort")
|
||||
res = res.replace("long long", "LL")
|
||||
res = res.replace(", ",",")
|
||||
return res.strip()
|
||||
|
||||
|
||||
def internal_glob(dir, match):
|
||||
listx = []
|
||||
for root, dirnames, filenames in os.walk(dir):
|
||||
for filename in fnmatch.filter(filenames, match):
|
||||
listx.append(os.path.join(root, filename))
|
||||
return listx
|
||||
|
||||
def get_obj_json_gz(filename):
|
||||
with gzip.GzipFile(filename, 'r') as f:
|
||||
return json.loads(f.read().decode('utf-8'))[-1]
|
||||
|
||||
|
||||
|
||||
def get_msg(msg):
|
||||
msg = msg.lower().strip()
|
||||
if "note: not vectorized:" in msg:
|
||||
msg = replace_msg.sub("_numb",msg.replace("note: not vectorized:",""))
|
||||
return( 0, 1, msg.strip())
|
||||
elif "loop vectorized" in msg:
|
||||
return (1, 0, None)
|
||||
# elif msg.startswith("missed")==False:
|
||||
# msg = replace_msg.sub("_numb",msg)
|
||||
# return( 0, 0, msg.strip())
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
class File_Info:
|
||||
'''
|
||||
Holds information about vectorized and miss vectorized lines for one file
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
self.infos = {}
|
||||
self.total_opted =0
|
||||
self.total_missed = 0
|
||||
self.external = False
|
||||
|
||||
|
||||
def add_line(self, line_pos):
|
||||
if line_pos not in self.infos:
|
||||
v = info()
|
||||
v.optimized = 0
|
||||
v.missed = 0
|
||||
v.miss_details = set()
|
||||
self.infos[line_pos] = v
|
||||
return v
|
||||
else:
|
||||
return self.infos[line_pos]
|
||||
|
||||
|
||||
def add_line_fsave(self, line_pos):
|
||||
if line_pos not in self.infos:
|
||||
v = info()
|
||||
v.optimized = 0
|
||||
v.missed = 0
|
||||
v.miss_details2 = dict()
|
||||
self.infos[line_pos] = v
|
||||
return v
|
||||
else:
|
||||
return self.infos[line_pos]
|
||||
|
||||
|
||||
|
||||
def add_fsave(self, line_pos,success, msg, function ,inline_fns=''):
|
||||
v = self.add_line_fsave(line_pos)
|
||||
if success and "loop vectorized" in msg:
|
||||
v.optimized +=1
|
||||
self.total_opted +=1
|
||||
elif success==False and "not vectorized:" in msg:
|
||||
#reduce this msg
|
||||
msg = msg.replace("not vectorized:","")
|
||||
v.missed +=1
|
||||
self.total_missed +=1
|
||||
msg = sys.intern(msg)
|
||||
if msg in v.miss_details2:
|
||||
ls = v.miss_details2.get(msg)
|
||||
ls.add(function)
|
||||
else:
|
||||
ls =set()
|
||||
v.miss_details2[msg]=ls
|
||||
ls.add(function)
|
||||
return self
|
||||
|
||||
def add(self, line_pos, msg_x):
|
||||
v = self.add_line(line_pos)
|
||||
if msg_x is not None:
|
||||
v.optimized += msg_x[0]
|
||||
v.missed += msg_x[1]
|
||||
self.total_opted += msg_x[0]
|
||||
self.total_missed += msg_x[1]
|
||||
if msg_x[2] is not None:
|
||||
v.miss_details.add(msg_x[2])
|
||||
return self
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.__dict__)
|
||||
|
||||
|
||||
|
||||
|
||||
def process_gzip_json_mp(args):
|
||||
process_gzip_json_new(*args)
|
||||
|
||||
def process_gzip_json_new(json_gz_fname,list_Queue):
|
||||
gz_name = Path(json_gz_fname).stem
|
||||
#print("::--open and process {0}".format(gz_name))
|
||||
queue_count = len(list_Queue)
|
||||
#print(queue_count)
|
||||
q = list_Queue[0]
|
||||
old_fname = ''
|
||||
total_c = 0
|
||||
for x in json_gzip_extract_objects(json_gz_fname,'message','vectorized'):
|
||||
external_source = True
|
||||
if len(x['message'])>0 and 'location' in x:
|
||||
line = int(x['location']['line'])
|
||||
file_name = x['location']['file'].strip()
|
||||
if file_dir_strip in file_name:
|
||||
file_name = file_name.replace(file_dir_strip,'./')
|
||||
external_source = False
|
||||
msg = x['message'][0]
|
||||
success = x['kind'] == 'success'
|
||||
func = '' if 'function' not in x else x['function']
|
||||
|
||||
if file_name!=old_fname:
|
||||
#send our info to the right consumer
|
||||
queue_ind = hash(file_name) % queue_count
|
||||
#print("quen index {0}".format(queue_ind))
|
||||
q =list_Queue[queue_ind]
|
||||
old_fname = file_name
|
||||
total_c +=1
|
||||
#print("pp {0} {1}".format(q,(file_name,line,success, msg, func,external_source )))
|
||||
if FSAVE_IGNORE_EXTERNALS==True and external_source == True:
|
||||
continue
|
||||
q.put((file_name,line,success, msg, func,external_source ))
|
||||
print("::finished {0:60s} :{1:8d}".format(gz_name,total_c))
|
||||
|
||||
def consume_processed_mp(args):
|
||||
return consume_processed_new(*args)
|
||||
|
||||
|
||||
|
||||
def consume_processed_new(list_Queue , c_index):
|
||||
|
||||
info_ = dict()
|
||||
func_list = dict()
|
||||
last_func_index = 0
|
||||
q = list_Queue[c_index]
|
||||
print("::consumer {0}".format(c_index))
|
||||
total_c = 0
|
||||
r_c = 0
|
||||
while True:
|
||||
#print("try to get new from {0}".format(index))
|
||||
obj = q.get()
|
||||
#print("cc {0} {1}".format(q,obj))
|
||||
if obj==None:
|
||||
break #we received the end
|
||||
file_name,line,success, msg, func, external_source = obj
|
||||
try:
|
||||
#get function index
|
||||
func_index = -1
|
||||
if func in func_list:
|
||||
func_index = func_list[func]
|
||||
else:
|
||||
func_list[func] = last_func_index
|
||||
func_index = last_func_index
|
||||
last_func_index +=1
|
||||
|
||||
if file_name in info_:
|
||||
info_[file_name].add_fsave(line, success, msg, func_index)
|
||||
else:
|
||||
info_[file_name] = File_Info().add_fsave(line, success, msg, func_index)
|
||||
info_[file_name].external = external_source
|
||||
total_c +=1
|
||||
if total_c - r_c >10000:
|
||||
r_c = total_c
|
||||
print("::consumer {0:2d} :{1:10d}".format(c_index,total_c))
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
break
|
||||
|
||||
print("::consumer {0:2d} :{1:10d}".format(c_index,total_c))
|
||||
#write to temp file
|
||||
wr_fname= "vecmiss_fsave{0}.html".format(str(c_index) if len(list_Queue)>1 else '')
|
||||
print("generate report for consumer {0} {1}".format(c_index,len(info_)))
|
||||
try:
|
||||
uniq_ind = str(c_index)+'_' if len(list_Queue)>1 else ''
|
||||
generate_report(wr_fname,info_ ,only_body = False, unique_id_prefix = uniq_ind,fsave_format = True, function_list= func_list)
|
||||
print(" consumer {0} saved output into {1}".format(c_index,wr_fname))
|
||||
except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
|
||||
|
||||
|
||||
def obtain_info_from(input_):
|
||||
info_ = dict()
|
||||
for line in input_:
|
||||
x = mtch.match(line)
|
||||
external_source = True
|
||||
if x:
|
||||
file_name =x.group(1).strip()
|
||||
if file_dir_strip in file_name:
|
||||
file_name = file_name.replace(file_dir_strip,'')
|
||||
external_source = False
|
||||
line_number = int(x.group(2))
|
||||
msg = x.group(4).lower()
|
||||
msg = msg.replace(file_dir_strip,'./')
|
||||
msg_x = get_msg(msg)
|
||||
if msg_x is None:
|
||||
continue
|
||||
if file_name in info_:
|
||||
#ignore col_number
|
||||
info_[file_name].add(line_number,msg_x)
|
||||
else:
|
||||
#print("{0} {1}".format(file_name,external_source))
|
||||
info_[file_name] = File_Info().add(line_number,msg_x)
|
||||
info_[file_name].external = external_source
|
||||
elif progress_msg.match(line):
|
||||
#actually we redirect only, stderr so this should not happen
|
||||
print("__"+line.strip())
|
||||
elif "error" in line or "Error" in line:
|
||||
print("****"+line.strip())
|
||||
return info_
|
||||
|
||||
|
||||
|
||||
def custom_style(fsave):
|
||||
st = '''<style>a{color:blue;}
|
||||
a:link{text-decoration:none}a:visited{text-decoration:none}a:hover{cursor:pointer;text-decoration:underline}
|
||||
a:active{text-decoration:underline}
|
||||
.f.ext{display:none}
|
||||
.f{color:#000;display:flex;overflow:hidden;justify-content:space-between;flex-wrap:wrap;align-items:baseline;width:100%}
|
||||
.f>div{min-width:10%}.f>div:first-child{min-width:70%;text-overflow:ellipsis}
|
||||
.f:nth-of-type(even){background-color:#f5f5f5}
|
||||
.f>div.g{flex:0 0 100%}.f>div:nth-child(2){font-weight:600;color:green}
|
||||
.f>div:nth-child(3){font-weight:600;color:red}
|
||||
.f>div:nth-child(2)::after{content:' ✓';color:green}.f>div:nth-child(3)::after{content:' -';color:red}
|
||||
.f>div.g>div>div:nth-child(2){font-weight:600;color:green}
|
||||
.f>div.g>div>div:nth-child(3){font-weight:600;color:red}
|
||||
.f>div.g>div>div:nth-child(2)::after{content:' ✓';color:green}
|
||||
.f>div.g>div>div:nth-child(3)::after{content:' -';color:red}
|
||||
.f>div.g>div{display:flex;justify-content:space-between;flex-wrap:wrap;align-items:baseline}
|
||||
.f>div.g>div>div{min-width:10%;text-align:left}
|
||||
.g>div:nth-of-type(even){background-color:#ede6fa}
|
||||
.f>div.g>div>ul{flex:0 0 100%}input[type=checkbox]{opacity:0;display:none}label{cursor:pointer}
|
||||
.f>label{color:red}input[type=checkbox]~.g{display:none}input[type=checkbox]:checked~.g{display:block}
|
||||
input[type=checkbox]~ul{display:none}
|
||||
input[type=checkbox]:checked~ul{display:block}input[type=checkbox]+label::after{content:"⇲";display:block}
|
||||
input[type=checkbox]:checked+label::after{content:"⇱";display:block}
|
||||
|
||||
'''
|
||||
if fsave==True:
|
||||
st+='''.modal{display:none;height:100%;background-color:#144F84;color:#fff;opacity:.93;left:0;position:fixed;top:0;width:100%}
|
||||
.modal.open{display:flex;flex-direction:column}.modal__header{height:auto;font-size:large;padding:10px;background-color:#000;color:#fff}
|
||||
.modal__footer{height:auto;font-size:medium;background-color:#000}
|
||||
.modal__content{height:100%;display:flex;flex-direction:column;padding:20px;overflow-y:auto}
|
||||
.modal_close{cursor:pointer;float:right}li{cursor:pointer}
|
||||
'''
|
||||
return st + '''</style>'''
|
||||
|
||||
def header(fsave=False):
|
||||
strx ='<!DOCTYPE html>\n<html>\n<head>\n<meta charset="UTF-8">\n<title>Auto-Vectorization</title>\n'
|
||||
strx +='<base id="base_id" href="{0}" target="_blank" >'.format(BASE_URL)
|
||||
strx +=custom_style(fsave)
|
||||
strx +='\n</head>\n<body>\n'
|
||||
return strx
|
||||
|
||||
def footer():
|
||||
return '\n</body></html>'
|
||||
|
||||
|
||||
def get_compressed_indices(set_a):
|
||||
a_len = len(set_a)
|
||||
if a_len<=1:
|
||||
if a_len<1:
|
||||
return ''
|
||||
return str(set_a)[1:-1]
|
||||
#we sorted and only saved difference
|
||||
# 1,14,15,19 --> 1,13,1,4 10bytes=>8bytes
|
||||
list_sorted = sorted(list(set_a))
|
||||
last = list_sorted[0]
|
||||
str_x = str(list_sorted[0])
|
||||
for i in range(1,a_len):
|
||||
str_x += ','+str(list_sorted[i]-last)
|
||||
last = list_sorted[i]
|
||||
return str_x
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def get_content(k, v, unique_id_prefix = '', fsave_format=False):
|
||||
inner_str=''
|
||||
content = ''
|
||||
inc_id = 0
|
||||
for fk,fv in sorted(v.infos.items()):
|
||||
if fsave_format==True:
|
||||
inner_str+='<div><div><a>{0}</a></div><div>{1}</div><div>{2}</div><input type="checkbox" id="c{3}{4}"><label for="c{3}{4}"></label><ul>'.format(
|
||||
fk,fv.optimized,fv.missed,unique_id_prefix,inc_id)
|
||||
else:
|
||||
inner_str+='<div><div><a href=".{0}#L{1}">{1}</a></div><div>{2}</div><div>{3}</div><input type="checkbox" id="c{4}{5}"><label for="c{4}{5}"></label><ul>'.format(
|
||||
k,fk,fv.optimized,fv.missed,unique_id_prefix,inc_id)
|
||||
inc_id+=1
|
||||
if fsave_format==True:
|
||||
#
|
||||
for dt,df in fv.miss_details2.items():
|
||||
#inner_str +='<li data-fns="{0}">{1}</li>'.format(str(df).replace(", ",",")[1:-1],dt)
|
||||
inner_str +='<li data-fns="{0}">{1}</li>'.format(get_compressed_indices(df),dt)
|
||||
else:
|
||||
for dt in fv.miss_details:
|
||||
inner_str+="<li>"+str(dt)+ "</li>"
|
||||
inner_str+="</ul></div>\n"
|
||||
|
||||
content += '<div class="f'
|
||||
if v.external:
|
||||
content += " ext"
|
||||
content += '">\n<div>{0}</div><div>{1}</div><div>{2}</div><input type="checkbox" id="i{3}{4}"><label for="i{3}{4}"></label>'.format(
|
||||
k,v.total_opted,v.total_missed,unique_id_prefix,inc_id)
|
||||
content += "<div class='g'>"
|
||||
content += inner_str
|
||||
content += "</div> </div>\n"
|
||||
return content
|
||||
|
||||
|
||||
def jscript_head():
|
||||
return '''
|
||||
window.onload = function () {
|
||||
var modal = document.getElementsByClassName("modal")[0];
|
||||
var modal_close = document.getElementsByClassName("modal_close")[0];
|
||||
var content = document.getElementsByClassName("modal__content")[0];
|
||||
a_tags = document.getElementsByTagName("a");
|
||||
base_href = document.getElementById("base_id").href;
|
||||
for(i=0;i<a_tags.length;i++){
|
||||
a_tags[i].addEventListener("click", function () {
|
||||
var source = event.target || event.srcElement;
|
||||
file_src = source.parentElement.parentElement.parentElement.parentElement.children[0].innerText ;
|
||||
link = base_href + file_src+'#L'+ source.innerText;
|
||||
window.open(link, '_blank');
|
||||
|
||||
});
|
||||
}
|
||||
modal_close.addEventListener("click", function () {
|
||||
content.innerHTML = '';
|
||||
modal.className = 'modal';
|
||||
});
|
||||
|
||||
'''
|
||||
def jscipt_end():
|
||||
return '''
|
||||
tags = document.getElementsByTagName("li");
|
||||
function escapeHtml(unsafe) {
|
||||
return unsafe
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
for (i = 0; i < tags.length; i++) {
|
||||
tags[i].addEventListener("click", function () {
|
||||
var source = event.target || event.srcElement;
|
||||
funcs = source.dataset.fns.split(",")
|
||||
strx = ''
|
||||
//we saved differences,not real indices
|
||||
last_ind = 0;
|
||||
for (j = 0; j < funcs.length; j++) {
|
||||
ind = last_ind + parseInt(funcs[j]);
|
||||
strx += "<p>" + escapeHtml(func_list[ind]) + "</p>";
|
||||
last_ind = ind;
|
||||
}
|
||||
if (strx.length > 0) {
|
||||
content.innerHTML = strx;
|
||||
modal.className = 'modal open';
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
};'''
|
||||
|
||||
def additional_tags(fsave):
|
||||
if fsave==False:
|
||||
return ''
|
||||
#
|
||||
return '''<script type='text/javascript'>
|
||||
var script = document.createElement('script'); script.src = window.location.href+".js" ;
|
||||
document.head.appendChild(script);
|
||||
</script>
|
||||
<div class="modal">
|
||||
<div class="modal__header">Functions <span class="modal_close">X</span></div>
|
||||
<div class="modal__content"></div>
|
||||
<div class="modal__footer">========</div>
|
||||
</div>
|
||||
'''
|
||||
|
||||
def generate_report(output_name,info_ ,only_body = False, unique_id_prefix='',fsave_format = False , function_list = None):
|
||||
'''
|
||||
Generate Auto-Vectorization Report in html format
|
||||
'''
|
||||
|
||||
temp_str =''
|
||||
if fsave_format ==True:
|
||||
# we gonna dump function_list as key list sorted by value
|
||||
#and use it as jscript array
|
||||
sorted_funcs_by_index = sorted(function_list.items(), key=lambda x: x[1])
|
||||
del function_list
|
||||
with open(output_name+ ".js","w") as f:
|
||||
#temp_str =jscript_head() +'{ "fmaps":['
|
||||
temp_str = jscript_head() + "\n var func_list = ["
|
||||
for k,v in sorted_funcs_by_index:
|
||||
#json.dumps using for escape
|
||||
#print(str(v)+str(k))
|
||||
temp_str+=json.dumps(get_cxx_filt_result(k))+","
|
||||
#reduce write calls
|
||||
if len(temp_str)>8192*2:
|
||||
f.write(temp_str)
|
||||
temp_str= ''
|
||||
if len(temp_str)>0:
|
||||
f.write(temp_str)
|
||||
f.write('"-"];'+jscipt_end())
|
||||
|
||||
|
||||
temp_str = ''
|
||||
with open(output_name,"w") as f:
|
||||
if only_body==False:
|
||||
f.write(header(fsave_format))
|
||||
f.write(additional_tags(fsave_format))
|
||||
nm=0
|
||||
for k,v in sorted(info_.items()): # sorted(info_.items(), key=lambda x: x[1].total_opted, reverse=True):
|
||||
temp_str += get_content(k,v,unique_id_prefix+str(nm),fsave_format)
|
||||
#reduce io write calls
|
||||
if len(temp_str)>8192:
|
||||
f.write(temp_str)
|
||||
temp_str =''
|
||||
nm+=1
|
||||
if len(temp_str)>0:
|
||||
f.write(temp_str)
|
||||
if only_body==False:
|
||||
f.write(footer())
|
||||
|
||||
|
||||
def fsave_report_launch(json_gz_list):
|
||||
|
||||
cpus = cpu_count()
|
||||
if cpus>32:
|
||||
cpus = 24
|
||||
|
||||
c_count = 1 # 2 i sufficient # if cpus<=1 else min(4,cpus)
|
||||
p_count = 3 if cpus<=1 else max(8, cpus - c_count)
|
||||
|
||||
m = Manager()
|
||||
#consumer Queues
|
||||
list_Queue = [m.Queue() for index in range(0,c_count)]
|
||||
with Pool(processes=c_count) as consumers:
|
||||
#start consumers
|
||||
cs = consumers.map_async(consume_processed_mp,[(list_Queue, index,) for index in range(0,c_count)])
|
||||
with Pool(processes=p_count) as processors:
|
||||
processors.map(process_gzip_json_mp, [(fname, list_Queue,) for fname in json_gz_list])
|
||||
|
||||
#send ends to inform our consumers
|
||||
#send ends
|
||||
for q in list_Queue:
|
||||
q.put(None)
|
||||
|
||||
#wait for consumers
|
||||
cs.wait()
|
||||
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
if "--fsave" in sys.argv:
|
||||
json_gz_list = internal_glob(".","*.json.gz")
|
||||
fsave_report_launch(json_gz_list)
|
||||
return
|
||||
|
||||
file_info = obtain_info_from(sys.stdin)
|
||||
if len(file_info)>0:
|
||||
#print(file_info)
|
||||
print("---generating vectorization html report--")
|
||||
generate_report("vecmiss.html", file_info)
|
||||
else:
|
||||
# lets check if we got fsave files
|
||||
json_gz_list = internal_glob(".","*.json.gz")
|
||||
fsave_report_launch(json_gz_list)
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,354 @@
|
|||
'''
|
||||
@author : Abdelrauf rauf@konduit.ai
|
||||
Simple object xtractor form very big json files
|
||||
'''
|
||||
|
||||
import sys
|
||||
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
|
||||
|
||||
|
||||
cdef char JSON_1 = b':'
|
||||
cdef char JSON_2 = b','
|
||||
cdef char JSON_3 = b'{'
|
||||
cdef char JSON_4 = b'}'
|
||||
cdef char JSON_5 = b'['
|
||||
cdef char JSON_6 = b']'
|
||||
cdef char QUOTE = b'"'
|
||||
cdef char ESCAPE = b"\\"
|
||||
cdef char SPACE = b' '
|
||||
cdef char TAB = b't'
|
||||
cdef char CR = b'\r'
|
||||
cdef char NL = b'\n'
|
||||
cdef char B = b'\b'
|
||||
cdef char EMPTY = b'\0'
|
||||
|
||||
|
||||
cdef struct Span:
|
||||
int b
|
||||
int e
|
||||
|
||||
cdef inline Span read_unquoted(char *text, int start,int end):
|
||||
cdef Span sp
|
||||
cdef int j = start
|
||||
while j < end:
|
||||
#if text[j].isspace():
|
||||
if text[j] == SPACE or text[j] == NL or text[j] == TAB or text[j] == CR or text[j] == B:
|
||||
j += 1
|
||||
continue
|
||||
if text[j] != QUOTE and text[j] != JSON_1 and text[j] != JSON_2 and text[j] != JSON_3 and text[j] != JSON_4 and text[j] != JSON_5 and text[j] != JSON_6:
|
||||
start = j
|
||||
j += 1
|
||||
while j < end:
|
||||
# read till JSON or white space
|
||||
if text[j] == SPACE or text[j] == NL or text[j] == TAB or text[j] == CR or text[j] == B:
|
||||
sp.b = start
|
||||
sp.e = j
|
||||
return sp
|
||||
elif text[j] == JSON_1 or text[j] == JSON_2 or text[j] == JSON_3 or text[j] == JSON_4 or text[j] == JSON_5 or text[j] == JSON_6:
|
||||
sp.b = start
|
||||
sp.e = j
|
||||
return sp
|
||||
j += 1
|
||||
if j == end-1:
|
||||
sp.b = start
|
||||
sp.e = end
|
||||
return sp
|
||||
break
|
||||
sp.b = j
|
||||
sp.e = j
|
||||
return sp
|
||||
|
||||
|
||||
cdef inline Span read_seq_token(char *text,int start,int end):
|
||||
#read quoted
|
||||
#skip white_space
|
||||
cdef Span sp
|
||||
cdef int j = start
|
||||
cdef char last_char
|
||||
cdef char char_x
|
||||
while j < end:
|
||||
if text[j] == SPACE or text[j] == NL or text[j] == TAB or text[j] == CR or text[j] == B:
|
||||
j += 1
|
||||
continue
|
||||
if text[j] == QUOTE:
|
||||
last_char = EMPTY
|
||||
#read till another quote
|
||||
start = j
|
||||
j += 1
|
||||
while j < end:
|
||||
char_x = text[j]
|
||||
if char_x == QUOTE and last_char != ESCAPE:
|
||||
# finished reading
|
||||
sp.b =start
|
||||
sp.e = j+1
|
||||
return sp
|
||||
last_char = char_x
|
||||
j += 1
|
||||
if j == end-1:
|
||||
sp.b = start
|
||||
sp.e = end
|
||||
return sp
|
||||
else:
|
||||
break
|
||||
return read_unquoted(text, j, end)
|
||||
|
||||
|
||||
def tokenizer_spans(utext):
|
||||
'''
|
||||
we will just return tokenize spans
|
||||
'''
|
||||
token_spans = []
|
||||
last_char = b''
|
||||
end_i = len(utext)
|
||||
cdef char *text = utext
|
||||
i = 0
|
||||
cdef Span sp
|
||||
while i < end_i:
|
||||
sp = read_seq_token(text, i, end_i)
|
||||
i = sp.e
|
||||
if sp.e > sp.b:
|
||||
token_spans.append((sp.b, sp.e))
|
||||
if i < end_i:
|
||||
#if text[i] in JSON:
|
||||
if text[i] == JSON_3 or text[i] == JSON_4 or text[i] == JSON_5 or text[i] == JSON_6 or text[i] == JSON_1 or text[i] == JSON_2:
|
||||
token_spans.append((i, i+1))
|
||||
i += 1
|
||||
return token_spans
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cdef class JsonObjXtractor:
|
||||
'''
|
||||
JsonObjXtractor that utilize cython better
|
||||
'''
|
||||
|
||||
cdef Span* token_spans
|
||||
cdef size_t size
|
||||
|
||||
def __cinit__(self, size_t count=4096):
|
||||
self.token_spans = <Span*> PyMem_Malloc(count * sizeof(Span))
|
||||
self.size = count
|
||||
if not self.token_spans:
|
||||
raise MemoryError()
|
||||
|
||||
|
||||
def __tokenizer_spans(self,utext, length):
|
||||
'''
|
||||
we will just return token spans length
|
||||
'''
|
||||
|
||||
last_char = b''
|
||||
end_i = length
|
||||
cdef char *text = utext
|
||||
cdef int i = 0
|
||||
cdef size_t j = 0
|
||||
cdef Span sp
|
||||
while i < end_i:
|
||||
sp = read_seq_token(text, i, end_i)
|
||||
i = sp.e
|
||||
if sp.e > sp.b:
|
||||
self.token_spans[j] = sp
|
||||
j+=1
|
||||
if j>self.size:
|
||||
#we need to reallocate
|
||||
self.__resize(self.size+self.size//2)
|
||||
if i < end_i:
|
||||
#if text[i] in JSON:
|
||||
if text[i] == JSON_3 or text[i] == JSON_4 or text[i] == JSON_5 or text[i] == JSON_6 or text[i] == JSON_1 or text[i] == JSON_2:
|
||||
sp.b=i
|
||||
sp.e=i+1
|
||||
self.token_spans[j] = sp
|
||||
j+=1
|
||||
if j>self.size:
|
||||
#we need to reallocate
|
||||
self.__resize(self.size+self.size//2)
|
||||
i += 1
|
||||
return j
|
||||
|
||||
|
||||
|
||||
def try_extract_parent_obj(self, json_bytes, property_name, next_contains_value=b'', debug=False):
|
||||
'''
|
||||
try_extract_parent_obj(json_text, property_name, next_contains_value='', debug=False):
|
||||
make sure that passed variables encoded to bytes with encode('utf-8')
|
||||
next_contains_value either direct content or followed by '['
|
||||
tries to extract the parent object for given named object
|
||||
if the left brace of the parent object is outside of the current buffer
|
||||
it will be ignored
|
||||
if the right brace is outside of the buffer it will be left to be handled by caller
|
||||
'''
|
||||
|
||||
look_for_the_left = True
|
||||
parent_left = []
|
||||
parent_right = []
|
||||
parent_objects = []
|
||||
len_next = len(next_contains_value)
|
||||
cdef int ind = 0
|
||||
cdef int end
|
||||
cdef int last_start = 0
|
||||
property_name = b'"'+property_name+b'"'
|
||||
cdef int lenx = self.__tokenizer_spans(json_bytes,len(json_bytes))
|
||||
cdef char x
|
||||
cdef int i = -1
|
||||
cdef Span sp
|
||||
while i < lenx-1:
|
||||
i += 1
|
||||
ind = self.token_spans[i].b
|
||||
x = json_bytes[ind]
|
||||
#print("-----{0} -- {1} -- {2} ".format(x,parent_left,parent_right))
|
||||
if look_for_the_left == False:
|
||||
if x == JSON_3:
|
||||
parent_right.append(ind)
|
||||
elif x == JSON_4:
|
||||
if len(parent_right) == 0:
|
||||
#we found parent closing brace
|
||||
look_for_the_left = True
|
||||
parent_objects.append((parent_left[-1], ind+1))
|
||||
last_start = ind+1
|
||||
#print("=============found {0}".format(parent_objects))
|
||||
parent_left = []
|
||||
parent_right = []
|
||||
else:
|
||||
parent_right.pop()
|
||||
continue
|
||||
#search obj
|
||||
if look_for_the_left:
|
||||
if x == JSON_3:
|
||||
parent_left.append(ind)
|
||||
last_start = ind
|
||||
elif x == JSON_4:
|
||||
if len(parent_left) >= 1:
|
||||
#ignore
|
||||
parent_left.pop()
|
||||
|
||||
if x == JSON_1: # ':'
|
||||
#check to see if propertyname
|
||||
old_property = EMPTY
|
||||
if i > 1:
|
||||
sp = self.token_spans[i-1]
|
||||
old_property = json_bytes[sp.b:sp.e]
|
||||
if old_property == property_name:
|
||||
#we found
|
||||
if len(parent_left) < 1:
|
||||
#left brace is outside of the buffer
|
||||
#we have to ignore it
|
||||
#try to increase buffer
|
||||
if debug:
|
||||
print('''left brace of the parent is outside of the buffer and parent is big.
|
||||
it will be ignored
|
||||
try to choose disambiguous property names if you are looking for small objects''', file=sys.stderr)
|
||||
last_start = ind+1
|
||||
parent_left = []
|
||||
parent_right = []
|
||||
continue
|
||||
else:
|
||||
#print("++++++ look for the right brace")
|
||||
if len_next>0 and i+1 < lenx:
|
||||
i += 1
|
||||
ind = self.token_spans[i].b
|
||||
end = self.token_spans[i].e
|
||||
m = json_bytes[ind]
|
||||
|
||||
if m == JSON_5:
|
||||
#print ("----{0} {1}".format(m,JSON_5))
|
||||
if i+1 < lenx:
|
||||
i += 1
|
||||
ind = self.token_spans[i].b
|
||||
end = self.token_spans[i].e
|
||||
#print ("----{0} == {1}".format(next_contains_value,json_bytes[ind:end]))
|
||||
if len_next <= end-ind and next_contains_value in json_bytes[ind:end]:
|
||||
look_for_the_left = False
|
||||
continue
|
||||
elif len_next <= end-ind and next_contains_value in json_bytes[ind:end]:
|
||||
look_for_the_left = False
|
||||
continue
|
||||
|
||||
#ignore as it does not have that value
|
||||
parent_left = []
|
||||
parent_right = []
|
||||
last_start = ind + 1
|
||||
else:
|
||||
look_for_the_left = False
|
||||
|
||||
# lets return last succesful opened brace as the last
|
||||
# or left brace failure case, safe closed brace
|
||||
if len(parent_left)>0:
|
||||
return (parent_objects, parent_left[-1])
|
||||
|
||||
return (parent_objects, last_start)
|
||||
|
||||
|
||||
|
||||
def __resize(self, size_t new_count):
|
||||
cdef Span* mem = <Span*> PyMem_Realloc(self.token_spans, new_count * sizeof(Span))
|
||||
if not mem:
|
||||
raise MemoryError()
|
||||
self.token_spans = mem
|
||||
self.size = new_count
|
||||
|
||||
def __dealloc__(self):
|
||||
PyMem_Free(self.token_spans)
|
||||
|
||||
|
||||
|
||||
import json
|
||||
import gzip
|
||||
import sys
|
||||
DEBUG_LOG = False
|
||||
|
||||
def json_gzip_extract_objects(filename, property_name, next_contains_value=''):
|
||||
strx = b''
|
||||
started= False
|
||||
b_next_contains_value = next_contains_value.encode('utf-8')
|
||||
b_property_name = property_name.encode('utf-8')
|
||||
#print(b_property_name)
|
||||
objXt = JsonObjXtractor()
|
||||
with gzip.open(filename, 'rb') as f:
|
||||
if DEBUG_LOG:
|
||||
print("opened {0}".format(filename), file=sys.stderr)
|
||||
#instead of reading it as line, I will read it as binary bytes
|
||||
is_End = False
|
||||
#total = 0
|
||||
while is_End==False:
|
||||
buffer = f.read(8192*2)
|
||||
|
||||
lenx= len(buffer)
|
||||
#total +=lenx
|
||||
if lenx<1:
|
||||
is_End = True
|
||||
else:
|
||||
strx = strx + buffer
|
||||
|
||||
objects , last_index = objXt.try_extract_parent_obj(strx,b_property_name,b_next_contains_value)
|
||||
|
||||
# if b_property_name in strx and b_next_contains_value in strx:
|
||||
# print(strx)
|
||||
# print(objects)
|
||||
# print(last_index)
|
||||
# print("===================================================")
|
||||
|
||||
for start,end in objects:
|
||||
yield json.loads(strx[start:end]) #.decode('utf-8'))
|
||||
|
||||
|
||||
#remove processed
|
||||
if last_index< len(strx):
|
||||
strx = strx[last_index:]
|
||||
|
||||
else:
|
||||
strx = b''
|
||||
#print('----+++')
|
||||
|
||||
if(len(strx)>16384*3):
|
||||
#buffer to big
|
||||
#try to avoid big parents
|
||||
if DEBUG_LOG:
|
||||
print("parent object is too big. please, look for better property name", file=sys.stderr)
|
||||
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
from distutils.core import setup
|
||||
from Cython.Build import cythonize
|
||||
setup(ext_modules=cythonize("bigGzipJson.pyx", language_level="3"))
|
|
@ -282,6 +282,32 @@ elseif(CPU_BLAS)
|
|||
set_source_files_properties(../include/helpers/impl/OpTracker.cpp PROPERTIES COMPILE_FLAGS "-march=x86-64 -mtune=generic")
|
||||
endif()
|
||||
|
||||
if(CHECK_VECTORIZATION)
|
||||
set(VECT_FILES cpu/NativeOps.cpp ${OPS_SOURCES} ${HELPERS_SOURCES} ${CUSTOMOPS_GENERIC_SOURCES} ${LOOPS_SOURCES})
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
|
||||
if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
|
||||
set(CHECK_VECT_FLAGS "-ftree-vectorize -fsave-optimization-record")
|
||||
#to process fsave-optimization-record we will need our cython version code
|
||||
message("Build Auto vectorization helpers")
|
||||
execute_process(COMMAND "python3" "${CMAKE_CURRENT_SOURCE_DIR}/../auto_vectorization/cython_setup.py" "build_ext" "--inplace" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../auto_vectorization/" RESULT_VARIABLE ret)
|
||||
message("build='${ret}'")
|
||||
|
||||
#remove fail cases that gcc fails produce sometimes
|
||||
file(GLOB_RECURSE FAILURE_CASES false ../include/loops/cpu/compilation_units/reduce3*.cpp)
|
||||
#message("*****${FAILURE_CASES}")
|
||||
foreach(FL_ITEM ${FAILURE_CASES})
|
||||
message("Removing failure cases ${FL_ITEM}")
|
||||
list(REMOVE_ITEM VECT_FILES ${FL_ITEM})
|
||||
endforeach()
|
||||
else()
|
||||
set(CHECK_VECT_FLAGS "-ftree-vectorize -fopt-info-vec-optimized-missed")
|
||||
endif()
|
||||
message("CHECK VECTORIZATION ${CHECK_VECT_FLAGS}")
|
||||
set_source_files_properties( ${VECT_FILES} PROPERTIES COMPILE_FLAGS "${CHECK_VECT_FLAGS}" )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message("CPU BLAS")
|
||||
add_definitions(-D__CPUBLAS__=true)
|
||||
add_library(nd4jobj OBJECT cpu/NativeOps.cpp cpu/GraphExecutioner.cpp
|
||||
|
|
|
@ -55,6 +55,7 @@ TESTS="false"
|
|||
VERBOSE="false"
|
||||
VERBOSE_ARG="VERBOSE=1"
|
||||
HELPER=
|
||||
CHECK_VECTORIZATION="OFF"
|
||||
NAME=
|
||||
while [[ $# > 0 ]]
|
||||
do
|
||||
|
@ -114,6 +115,9 @@ case $key in
|
|||
NAME="$value"
|
||||
shift # past argument
|
||||
;;
|
||||
--check-vectorization)
|
||||
CHECK_VECTORIZATION="ON"
|
||||
;;
|
||||
-j)
|
||||
MAKEJ="$value"
|
||||
shift # past argument
|
||||
|
@ -528,14 +532,27 @@ echo MINIFIER = "${MINIFIER_ARG}"
|
|||
echo TESTS = "${TESTS_ARG}"
|
||||
echo NAME = "${NAME_ARG}"
|
||||
echo OPENBLAS_PATH = "$OPENBLAS_PATH"
|
||||
echo CHECK_VECTORIZATION = "$CHECK_VECTORIZATION"
|
||||
echo HELPERS = "$HELPERS"
|
||||
mkbuilddir
|
||||
pwd
|
||||
eval $CMAKE_COMMAND "$BLAS_ARG" "$ARCH_ARG" "$NAME_ARG" $HELPERS "$SHARED_LIBS_ARG" "$MINIFIER_ARG" "$OPERATIONS_ARG" "$BUILD_TYPE" "$PACKAGING_ARG" "$EXPERIMENTAL_ARG" "$TESTS_ARG" "$CUDA_COMPUTE" -DOPENBLAS_PATH="$OPENBLAS_PATH" -DDEV=FALSE -DCMAKE_NEED_RESPONSE=YES -DMKL_MULTI_THREADED=TRUE ../..
|
||||
eval $CMAKE_COMMAND "$BLAS_ARG" "$ARCH_ARG" "$NAME_ARG" -DCHECK_VECTORIZATION="${CHECK_VECTORIZATION}" $HELPERS "$SHARED_LIBS_ARG" "$MINIFIER_ARG" "$OPERATIONS_ARG" "$BUILD_TYPE" "$PACKAGING_ARG" "$EXPERIMENTAL_ARG" "$TESTS_ARG" "$CUDA_COMPUTE" -DOPENBLAS_PATH="$OPENBLAS_PATH" -DDEV=FALSE -DCMAKE_NEED_RESPONSE=YES -DMKL_MULTI_THREADED=TRUE ../..
|
||||
|
||||
if [ "$PARALLEL" == "true" ]; then
|
||||
MAKE_ARGUMENTS="$MAKE_ARGUMENTS -j $MAKEJ"
|
||||
fi
|
||||
if [ "$VERBOSE" == "true" ]; then
|
||||
MAKE_ARGUMENTS="$MAKE_ARGUMENTS $VERBOSE_ARG"
|
||||
fi
|
||||
eval $MAKE_COMMAND $MAKE_ARGUMENTS && cd ../../..
|
||||
|
||||
if [ "$CHECK_VECTORIZATION" == "ON" ]; then
|
||||
|
||||
if [ "$MAKE_COMMAND" == "make" ]; then
|
||||
MAKE_ARGUMENTS="$MAKE_ARGUMENTS --output-sync=target"
|
||||
fi
|
||||
exec 3>&1
|
||||
eval $MAKE_COMMAND $MAKE_ARGUMENTS 2>&1 >&3 3>&- | python3 ../../auto_vectorization/auto_vect.py && cd ../../..
|
||||
exec 3>&-
|
||||
else
|
||||
eval $MAKE_COMMAND $MAKE_ARGUMENTS && cd ../../..
|
||||
fi
|
||||
|
|
Loading…
Reference in New Issue