import sys import re import locale import getopt import time locale.setlocale(locale.LC_ALL,"") start=time.time() def usage(): print "-c : break up statistics per client" print "-s : break up statistics per back-end server" print "-f hh:mm: start parsing at a given point in time" print "-t hh:mm: stop parsing after a given point in time" print "-h : print this help message" print "-v : print tool version" version="0.6d" per_client=False client_filter="*" per_server=False server_filter="*" dig_from=False dig_from_h=0 dig_from_m=0 dig_to=False dig_to_h=0 dig_to_m=0 try: opts, args = getopt.getopt(sys.argv[1:], "c:f:t:hs:v", ["per-client","from=","to=","help","per-server","version"]) print opts print args for option, value in opts: if option in ("-v", "--version"): print "DPS Log Digger Version "+version sys.exit(0) elif option in ("-c","--per-client"): per_client=True client_filter=value elif option in ("-s","--per-server"): per_server=True server_filter=value elif option in ("-h", "--help"): usage() sys.exit(0) elif option in ("-f","--from"): arg_ok = re.match("(?P\d{2})\:(?P\d{2})",value) if arg_ok: dig_from=True dig_from_h=int(arg_ok.group('hour')) dig_from_m=int(arg_ok.group('minute')) print "Start parsing at %d:%d" %(dig_from_h,dig_from_m) else: print "Wrong 'from' time pattern. Use something like 20:19" usage() sys.exit(17) elif option in ("-t","--to"): arg_ok = re.match("(?P\d{2})\:(?P\d{2})",value) if arg_ok: dig_to=True dig_to_h=int(arg_ok.group('hour')) dig_to_m=int(arg_ok.group('minute')) print "Stop parsing at %d:%d" % (dig_to_h,dig_to_m) else: print "Wrong 'to' time pattern. Use something like 20:19" usage() sys.exit(18) except getopt.GetoptError, err: print str(err) usage() sys.exit(2) def get_ip(connections,cx): result="OTHER" if cx in connections: result=connections[cx] return result # Initialization of globally visible stuff operationTypes = ( "ABANDON", "ADD", "BIND", "CONNECT", "COMPARE", "DELETE","DISCONNECT", "EXTENDED", "MODIFY", "MODIFY DN", "SEARCH", "TOTAL") stat_types=("avg","max","tx") fileName=args[0] f=open(fileName,"r") my_files={} header_str="TIME,CLIENT,BACK-END," for ot in operationTypes: header_str+="%s," % (ot.replace(" ","_")) for stat_type in stat_types: my_files[stat_type]=open(fileName+"-"+stat_type+".csv","w") my_files[stat_type].write(header_str.rstrip(",")+"\n") def dump_to_file(time_str, perIP): for ip in sorted(perIP.keys()): for be in sorted(perIP[ip].keys()): ip_str = "%s,%s,%s," % (time_str,ip,be) my_str={} for stat_type in stat_types: my_str[stat_type]=ip_str for ot in operationTypes: for stat_type in stat_types: #if stat_type != "tx" and be != "_ALLCLIENTS": # continue if stat_type == "avg": if perIP[ip][be][ot]['tx'] != 0: perIP[ip][be][ot]['avg']=float(perIP[ip][be][ot]['avg'])/perIP[ip][be][ot]['tx'] my_str[stat_type] += "%4.3f," % (perIP[ip][be][ot][stat_type]) else: my_str[stat_type] += "%d," % (perIP[ip][be][ot][stat_type]) perIP[ip][be][ot][stat_type]=0 for stat_type in stat_types: my_files[stat_type].write(my_str[stat_type].rstrip(",")+"\n") print "%s Done" % (time_str) def add_client(perIP,ip,client_filter,per_server): if ip not in perIP: perIP[ip]={} add_server(perIP,ip,'INCOMING') if per_server: add_server(perIP,ip,'_ALLSERVERS') def add_server(perIP,client,server): perIP[client][server]={} for ot in operationTypes: perIP[client][server][ot]={} for stat_type in stat_types: perIP[client][server][ot][stat_type]=0 timeMask=r'^\[\d{2}/[A-Z][a-z]{2}/\d{4}:(?P\d{2}):(?P\d{2}):\d{2} [-+]\d{4}\].*$' client_cxMask = r'^.* - CONNECT .* conn=(?P\d+) client=(?P[0-9\.]+).*$' client_etMask = r'^.* - OPERATION .* conn=(?P\d+) op=\d+ (?P[A-Z ]+) RESPONSE .* etime=(?P[0-9]+)$' client_dxMask = r'^.* - DISCONNECT .* conn=(?P\d+) .*$' client_abMask = r'^.* conn=(?P\d+) .* ABANDON .*$' server_txMask = r'^.* - SERVER_OP .* conn=(?P\d+) .* (?P[A-Z ]+) RESPONSE .* s_conn=(?P[^:]+).*$' timePattern=re.compile(timeMask) etPattern = re.compile(client_etMask) client_connections={} # perIP is silghtly complicated # perIP[clientIP][serverIP][operationType][stat_type] perIP={} # SPECIAL BUCKET TO CONTAIN THE SUM OF ALL CLIENT ACTIVITY add_client(perIP,'_ALLCLIENTS',client_filter,per_server) if per_client: # SPECIAL BUCKET TO CONTAIN THE TRAFFIC OF 'OTHER' CLIENTS # THIS GENERALLY CONTAINS THE TRAFFIC FOR WHICH THE CONNECTION # WAS OPENED EARLIER THAN THE START OF THE PARSED LOG add_client(perIP,'OTHER',client_filter,per_server) cxPattern = re.compile(client_cxMask) dxPattern=re.compile(client_dxMask) abPattern=re.compile(client_abMask) if per_server: srvPattern=re.compile(server_txMask) curMin=60 curHour=0 i=0 firstDump=True for line in f.xreadlines(): i+=1 if (i%100000)==0: print locale.format("%d",i,True) + " lines processed" t = timePattern.match(line) if t: nextHour = int(t.group('hour')) nextMin = int(t.group('minute')) # if we are parsing a subset of the log # and want per client data # then we need to construct the connections list per IP # right from the beginning of the file to increase our chances # of associating ip with connection to be able to associate # them later within the desired subset if per_client and line.find('- CONNECT ')>0: m = cxPattern.match(line) ip=m.group('client') if client_filter in ("*",ip): client_connections[m.group('cx')]=ip add_client(perIP,ip,client_filter,per_server) if dig_from: if not ( (nextHour>=dig_from_h) and (nextMin>=dig_from_m) ): continue else: dig_from=False if dig_to and ( nextHour == dig_to_h ) and ( nextMin> dig_to_m ): break if (curMin != nextMin): if not firstDump: time_str="%02d:%02d" % (curHour,curMin) dump_to_file(time_str,perIP) else: firstDump=False curMin=nextMin curHour=nextHour e = etPattern.match(line) if e: eop=e.group('operation') et=int(e.group('etime')) if eop not in operationTypes: print eop # SAVE THE GRAND TOTAL perIP['_ALLCLIENTS']['INCOMING']['TOTAL']['tx']+=1 perIP['_ALLCLIENTS']['INCOMING']['TOTAL']['avg']+=et if perIP['_ALLCLIENTS']['INCOMING']['TOTAL']['max']= perIP[ip]['INCOMING'][eop]['max']: perIP[ip]['INCOMING'][eop]['max']=et else: if per_server: m=srvPattern.match(line) if m: ip=get_ip(client_connections,m.group('cx')) server=m.group('server') op=m.group('operation') op=op.replace("DN"," DN") perIP['_ALLCLIENTS']['_ALLSERVERS']['TOTAL']['tx']+=1 perIP['_ALLCLIENTS']['_ALLSERVERS'][op]['tx']+=1 if server_filter not in ("*",server): server="OTHER" if server not in perIP['_ALLCLIENTS']: add_server(perIP,'_ALLCLIENTS',server) perIP['_ALLCLIENTS'][server]['TOTAL']['tx']+=1 perIP['_ALLCLIENTS'][server][op]['tx']+=1 if per_client: #if client_filter not in ("*",ip): # ip="OTHER" if server not in perIP[ip]: add_server(perIP,ip,server) perIP[ip]['_ALLSERVERS']['TOTAL']['tx']+=1 perIP[ip]['_ALLSERVERS'][op]['tx']+=1 perIP[ip][server]['TOTAL']['tx']+=1 perIP[ip][server][op]['tx']+=1 elif line.find('- CONNECT ')>0: perIP['_ALLCLIENTS']['INCOMING']['CONNECT']['tx']+=1 if per_client: cp = cxPattern.match(line) ip=cp.group('client') if client_filter in ("*",ip): perIP[ip]['INCOMING']['CONNECT']['tx']+=1 else: if line.find('- DISCONNECT ')>0: perIP['_ALLCLIENTS']['INCOMING']['DISCONNECT']['tx']+=1 if per_client: dc = dxPattern.match(line) cx=dc.group('cx') ip=get_ip(client_connections,cx) if ip != "OTHER" and ip in client_connections: del client_connections[cx] if client_filter in ("*",ip): perIP[ip]['INCOMING']['DISCONNECT']['tx']+=1 else: if line.find(' ABANDON ')>0: perIP['_ALLCLIENTS']['INCOMING']['ABANDON']['tx']+=1 perIP['_ALLCLIENTS']['INCOMING']['TOTAL']['tx']+=1 if per_client: ab=abPattern.match(line) ip=get_ip(client_connections,ab.group('cx')) if client_filter in ("*",ip): perIP[ip]['INCOMING']['ABANDON']['tx']+=1 f.close() dump_to_file("%02d:%02d"%(curHour,curMin),perIP) for stat_type in stat_types: my_files[stat_type].close() print "Processing took " + str(time.time()-start) + " seconds"