''' @author : Abdelrauf rauf@konduit.ai ''' import re import sys import os import subprocess import fnmatch import json import gzip try: from bigGzipJson import json_gzip_extract_objects except ImportError: pass from pathlib import Path from multiprocessing import Pool, Manager ,cpu_count import traceback import html mtch = re.compile(r"[^/]*([^:]+)\:(\d+)\:(\d+)\:(.*)") replace_msg = re.compile(r"(\d+)?\.?(\d+)?_?\d+\.?(\d+)?") progress_msg = re.compile(r"\s{0,4}\[\s{0,2}\d+\%\]") file_dir_strip = str(Path(os.getcwd())) pp_index = file_dir_strip.rfind("libnd4j") if pp_index>=0: file_dir_strip =file_dir_strip[:pp_index+len("libnd4j")] BASE_URL = "https://github.com/eclipse/deeplearning4j/tree/master/libnd4j/" if BASE_URL.endswith("/")==False: BASE_URL = BASE_URL + "/" #print(file_dir_strip) class info: def __repr__(self): return str(self.__dict__) FSAVE_IGNORE_EXTERNALS = True def get_cxx_filt_result(strx): if len(strx)<1: return "" res = subprocess.Popen(["c++filt","-i", strx], stdout=subprocess.PIPE).communicate()[0] res =res.decode('utf-8') #replace some long names to reduce size res = res.replace("unsigned long long", "uLL") res = res.replace("unsigned long int","uL") res = res.replace("unsigned long", "uL") res = res.replace("unsigned int", "ui") res = res.replace("unsigned char", "uchar") res = res.replace("unsigned short", "ushort") res = res.replace("long long", "LL") res = res.replace(", ",",") return res.strip() def internal_glob(dir, match): listx = [] for root, dirnames, filenames in os.walk(dir): for filename in fnmatch.filter(filenames, match): listx.append(os.path.join(root, filename)) return listx def get_obj_json_gz(filename): with gzip.GzipFile(filename, 'r') as f: return json.loads(f.read().decode('utf-8'))[-1] def get_msg(msg): msg = msg.lower().strip() if "note: not vectorized:" in msg: msg = replace_msg.sub("_numb",msg.replace("note: not vectorized:","")) return( 0, 1, msg.strip()) elif "loop vectorized" in msg: return (1, 0, None) # elif msg.startswith("missed")==False: # msg = replace_msg.sub("_numb",msg) # return( 0, 0, msg.strip()) return None class File_Info: ''' Holds information about vectorized and miss vectorized lines for one file ''' def __init__(self): self.infos = {} self.total_opted =0 self.total_missed = 0 self.external = False def add_line(self, line_pos): if line_pos not in self.infos: v = info() v.optimized = 0 v.missed = 0 v.miss_details = set() self.infos[line_pos] = v return v else: return self.infos[line_pos] def add_line_fsave(self, line_pos): if line_pos not in self.infos: v = info() v.optimized = 0 v.missed = 0 v.miss_details2 = dict() self.infos[line_pos] = v return v else: return self.infos[line_pos] def add_fsave(self, line_pos,success, msg, function ,inline_fns=''): v = self.add_line_fsave(line_pos) if success and "loop vectorized" in msg: v.optimized +=1 self.total_opted +=1 elif success==False and "not vectorized:" in msg: #reduce this msg msg = msg.replace("not vectorized:","") v.missed +=1 self.total_missed +=1 msg = sys.intern(msg) if msg in v.miss_details2: ls = v.miss_details2.get(msg) ls.add(function) else: ls =set() v.miss_details2[msg]=ls ls.add(function) return self def add(self, line_pos, msg_x): v = self.add_line(line_pos) if msg_x is not None: v.optimized += msg_x[0] v.missed += msg_x[1] self.total_opted += msg_x[0] self.total_missed += msg_x[1] if msg_x[2] is not None: v.miss_details.add(msg_x[2]) return self def __repr__(self): return str(self.__dict__) def process_gzip_json_mp(args): process_gzip_json_new(*args) def process_gzip_json_new(json_gz_fname,list_Queue): gz_name = Path(json_gz_fname).stem #print("::--open and process {0}".format(gz_name)) queue_count = len(list_Queue) #print(queue_count) q = list_Queue[0] old_fname = '' total_c = 0 for x in json_gzip_extract_objects(json_gz_fname,'message','vectorized'): external_source = True if len(x['message'])>0 and 'location' in x: line = int(x['location']['line']) file_name = x['location']['file'].strip() if file_dir_strip in file_name: file_name = file_name.replace(file_dir_strip,'./') external_source = False msg = x['message'][0] success = x['kind'] == 'success' func = '' if 'function' not in x else x['function'] if file_name!=old_fname: #send our info to the right consumer queue_ind = hash(file_name) % queue_count #print("quen index {0}".format(queue_ind)) q =list_Queue[queue_ind] old_fname = file_name total_c +=1 #print("pp {0} {1}".format(q,(file_name,line,success, msg, func,external_source ))) if FSAVE_IGNORE_EXTERNALS==True and external_source == True: continue q.put((file_name,line,success, msg, func,external_source )) print("::finished {0:60s} :{1:8d}".format(gz_name,total_c)) def consume_processed_mp(args): return consume_processed_new(*args) def consume_processed_new(list_Queue , c_index): info_ = dict() func_list = dict() last_func_index = 0 q = list_Queue[c_index] print("::consumer {0}".format(c_index)) total_c = 0 r_c = 0 while True: #print("try to get new from {0}".format(index)) obj = q.get() #print("cc {0} {1}".format(q,obj)) if obj==None: break #we received the end file_name,line,success, msg, func, external_source = obj try: #get function index func_index = -1 if func in func_list: func_index = func_list[func] else: func_list[func] = last_func_index func_index = last_func_index last_func_index +=1 if file_name in info_: info_[file_name].add_fsave(line, success, msg, func_index) else: info_[file_name] = File_Info().add_fsave(line, success, msg, func_index) info_[file_name].external = external_source total_c +=1 if total_c - r_c >10000: r_c = total_c print("::consumer {0:2d} :{1:10d}".format(c_index,total_c)) except Exception as e: print(traceback.format_exc()) break print("::consumer {0:2d} :{1:10d}".format(c_index,total_c)) #write to temp file wr_fname= "vecmiss_fsave{0}.html".format(str(c_index) if len(list_Queue)>1 else '') print("generate report for consumer {0} {1}".format(c_index,len(info_))) try: uniq_ind = str(c_index)+'_' if len(list_Queue)>1 else '' generate_report(wr_fname,info_ ,only_body = False, unique_id_prefix = uniq_ind,fsave_format = True, function_list= func_list) print(" consumer {0} saved output into {1}".format(c_index,wr_fname)) except Exception as e: print(traceback.format_exc()) def obtain_info_from(input_): info_ = dict() for line in input_: x = mtch.match(line) external_source = True if x: file_name =x.group(1).strip() if file_dir_strip in file_name: file_name = file_name.replace(file_dir_strip,'') external_source = False line_number = int(x.group(2)) msg = x.group(4).lower() msg = msg.replace(file_dir_strip,'./') msg_x = get_msg(msg) if msg_x is None: continue if file_name in info_: #ignore col_number info_[file_name].add(line_number,msg_x) else: #print("{0} {1}".format(file_name,external_source)) info_[file_name] = File_Info().add(line_number,msg_x) info_[file_name].external = external_source elif progress_msg.match(line): #actually we redirect only, stderr so this should not happen print("__"+line.strip()) elif "error" in line or "Error" in line: print("****"+line.strip()) return info_ def custom_style(fsave): st = '''''' def header(fsave=False): strx ='\n\n\n\nAuto-Vectorization\n' strx +=''.format(BASE_URL) strx +=custom_style(fsave) strx +='\n\n\n' return strx def footer(): return '\n' def get_compressed_indices(set_a): a_len = len(set_a) if a_len<=1: if a_len<1: return '' return str(set_a)[1:-1] #we sorted and only saved difference # 1,14,15,19 --> 1,13,1,4 10bytes=>8bytes list_sorted = sorted(list(set_a)) last = list_sorted[0] str_x = str(list_sorted[0]) for i in range(1,a_len): str_x += ','+str(list_sorted[i]-last) last = list_sorted[i] return str_x def get_content(k, v, unique_id_prefix = '', fsave_format=False): inner_str='' content = '' inc_id = 0 for fk,fv in sorted(v.infos.items()): if fsave_format==True: inner_str+='
{0}
{1}
{2}