""" This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice dependencies: - top (on the os) @TODO: Test this thing on windows to see if it works """ import pandas as pd import numpy as np import subprocess import os import datetime # from transport import factory import sys import hashlib from io import StringIO class Util: def app(self,stream): """ Formatting application name, sometimes the name has parameters os separators ... """ index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1 cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream) if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] : p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ') name = p[0] args = " ".join(p[1:]) else: name = cmd.split('/')[len(cmd.split(os.sep))-1] args = " ".join(stream[index:]) if index > 0 else "" return [name,cmd,args] def parse(self,rows,xchar=';'): """ This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted """ m = [] TIME_INDEX = 5 ARGS_INDEX = 6 for item in rows : if rows.index(item) != 0 : parts = item.split(xchar) row = parts[:TIME_INDEX] row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX])) row += self.app(parts[ARGS_INDEX:]) else: row = item.split(xchar) row = (xchar.join(row)).strip() if len(row.replace(";","")) > 0 : m.append(row) return m def read(**args) : """ This function will perform the actual reads of process informations. @return {user,pid,start,status, name, args, mem,cpu} """ cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'" xchar = ";" try: handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) stream = handler.communicate()[0] if sys.version_info[0] > 2 : rows = str(stream).split('\\n') else: rows = stream.split('\n') formatter = Util() m = formatter.parse(rows) d = datetime.datetime.now().strftime('%m-%d-%Y') t = datetime.datetime.now().strftime('%H:%M:%S') n = os.uname()[1] m = [item for item in m if len(item) != len (m[0])] m = "\n".join(m[1:]) df = pd.read_csv(StringIO(m),sep=xchar) df['date'] = np.repeat(d,df.shape[0]) df['time'] = np.repeat(t,df.shape[0]) df['node'] = np.repeat(os.uname()[1],df.shape[0]) df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node'] # # We should filter the name of the apps we are interested in here (returning the full logs ) # @TODO: Add filter here to handle filter on different columns # if 'name' in args : names = args['name'].split(',') r = pd.DataFrame() for name in names : tmp = df[df.name == name.strip()] if not tmp.shape[0] : tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":name,"cmd":None,"args":None,"date":d,"time":t,"node":n} r = r.append(tmp,ignore_index=True) df = r # # For security reasons lets has the args columns with an MD5 or sha256 # if 'args' in df : df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()] STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'} df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN')) if 'cols' in args : _cols = list(set(df.columns.tolist()) & set(args['cols'])) if _cols : df = df[_cols] # # we return a list of objects (no data-frames) if 'logger' in args and args['logger'] != None : logger = args['logger'] logger(data=df) return df.to_dict(orient='records') except Exception as e: print (e) pass if __name__ == '__main__' : # # Being directly called (external use of the ) print(read())