You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

140 lines
5.3 KiB
Python

"""
This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice
dependencies:
- top (on the os)
@TODO:
Test this thing on windows to see if it works
"""
import pandas as pd
import numpy as np
import subprocess
import os
import datetime
# from transport import factory
import sys
import hashlib
import re
from io import StringIO
class Util:
def app(self,stream):
"""
Formatting application name, sometimes the name has parameters os separators ...
"""
index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] :
p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ')
name = p[0]
args = " ".join(p[1:])
else:
name = cmd.split(os.sep)[len(cmd.split(os.sep))-1]
args = " ".join(stream[index:]) if index > 0 else ""
return [name,cmd.replace('"',"\\'"),args.replace('"',"\\'")]
def parse(self,rows,xchar=';'):
"""
This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted
"""
m = []
TIME_INDEX = 5
ARGS_INDEX = 6
for item in rows :
if rows.index(item) != 0 :
parts = item.split(xchar)
row = parts[:TIME_INDEX]
row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
row += self.app(parts[ARGS_INDEX:])
else:
row = item.split(xchar)
row = (xchar.join(row)).strip()
if len(row.replace(";","")) > 0 :
m.append(row)
return m
def read(**args) :
"""
This function will perform the actual reads of process informations.
@return {user,pid,start,status, name, args, mem,cpu}
"""
cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
xchar = ";"
try:
handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
stream = handler.communicate()[0]
if sys.version_info[0] > 2 :
rows = str(stream).split('\\n')
else:
rows = stream.split('\n')
formatter = Util()
m = formatter.parse(rows)
d = datetime.datetime.now().strftime('%m-%d-%Y')
t = datetime.datetime.now().strftime('%H:%M:%S')
n = os.uname()[1]
m = [item for item in m if len(item) != len (m[0])]
m = "\n".join(m[1:])
df = pd.read_csv(StringIO(m),sep=xchar)
df['date'] = np.repeat(d,df.shape[0])
df['time'] = np.repeat(t,df.shape[0])
df['node'] = np.repeat(os.uname()[1],df.shape[0])
df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
#
# We should filter the name of the apps we are interested in here (returning the full logs )
# @TODO: Add filter here to handle filter on different columns
#
if 'name' in args and args['name']:
names = args['name'].split(',')
r = pd.DataFrame()
for name in names :
# tmp = df[df.name == name.strip() ]
# ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist()
ii = df.apply(lambda row: type(row['cmd']) ==str and name.strip() == row['name'] in row['cmd'],axis=1).tolist()
tmp= df[ii]
# tmp.index = np.arange(tmp.shape[0])
if tmp.empty:
tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":name,"cmd":None,"args":None,"date":d,"time":t,"node":n}
else:
#r = r.append(tmp,ignore_index=False)
r = pd.concat([r, tmp]) #r.append(tmp,ignore_index=False)
if not r.empty :
# r.index = np.arange(r.shape[0])
df = r.copy()
#
# For security reasons lets has the args columns with an MD5 or sha256
#
# if not df.empty and 'args' in df :
# df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'}
df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN'))
if 'cols' in args :
_cols = list(set(df.columns.tolist()) & set(args['cols']))
if _cols :
df = df[_cols]
#
# we return a list of objects (no data-frames)
if 'logger' in args and args['logger'] != None :
logger = args['logger']
logger(data=df)
df.index = np.arange(df.shape[0])
return df #.to_dict(orient='records')
except Exception as e:
print (e)
pass
# if __name__ == '__main__' :
# #
# # Being directly called (external use of the )
# print(read())