You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
140 lines
5.0 KiB
Python
140 lines
5.0 KiB
Python
"""
|
|
This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice
|
|
dependencies:
|
|
- top (on the os)
|
|
@TODO:
|
|
Test this thing on windows to see if it works
|
|
"""
|
|
import pandas as pd
|
|
import numpy as np
|
|
import subprocess
|
|
import os
|
|
import datetime
|
|
# from transport import factory
|
|
import sys
|
|
import hashlib
|
|
import re
|
|
from io import StringIO
|
|
class Util:
|
|
|
|
def app(self,stream):
|
|
"""
|
|
Formatting application name, sometimes the name has parameters os separators ...
|
|
"""
|
|
index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
|
|
cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
|
|
|
|
if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] :
|
|
p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ')
|
|
name = p[0]
|
|
args = " ".join(p[1:])
|
|
else:
|
|
name = cmd.split('/')[len(cmd.split(os.sep))-1]
|
|
args = " ".join(stream[index:]) if index > 0 else ""
|
|
|
|
return [name,cmd,args]
|
|
def parse(self,rows,xchar=';'):
|
|
"""
|
|
This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted
|
|
"""
|
|
m = []
|
|
TIME_INDEX = 5
|
|
ARGS_INDEX = 6
|
|
|
|
for item in rows :
|
|
|
|
if rows.index(item) != 0 :
|
|
parts = item.split(xchar)
|
|
row = parts[:TIME_INDEX]
|
|
row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
|
|
row += self.app(parts[ARGS_INDEX:])
|
|
else:
|
|
row = item.split(xchar)
|
|
row = (xchar.join(row)).strip()
|
|
if len(row.replace(";","")) > 0 :
|
|
m.append(row)
|
|
return m
|
|
|
|
|
|
def read(**args) :
|
|
"""
|
|
This function will perform the actual reads of process informations.
|
|
@return {user,pid,start,status, name, args, mem,cpu}
|
|
"""
|
|
cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
|
|
xchar = ";"
|
|
try:
|
|
|
|
handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
|
stream = handler.communicate()[0]
|
|
if sys.version_info[0] > 2 :
|
|
rows = str(stream).split('\\n')
|
|
else:
|
|
rows = stream.split('\n')
|
|
|
|
formatter = Util()
|
|
m = formatter.parse(rows)
|
|
|
|
d = datetime.datetime.now().strftime('%m-%d-%Y')
|
|
t = datetime.datetime.now().strftime('%H:%M:%S')
|
|
n = os.uname()[1]
|
|
m = [item for item in m if len(item) != len (m[0])]
|
|
m = "\n".join(m[1:])
|
|
df = pd.read_csv(StringIO(m),sep=xchar)
|
|
df['date'] = np.repeat(d,df.shape[0])
|
|
df['time'] = np.repeat(t,df.shape[0])
|
|
df['node'] = np.repeat(os.uname()[1],df.shape[0])
|
|
df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
|
|
|
|
|
|
#
|
|
# We should filter the name of the apps we are interested in here (returning the full logs )
|
|
# @TODO: Add filter here to handle filter on different columns
|
|
#
|
|
|
|
if 'name' in args :
|
|
names = args['name'].split(',')
|
|
r = pd.DataFrame()
|
|
for name in names :
|
|
# tmp = df[df.name == name.strip() ]
|
|
ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist()
|
|
tmp= df[ii]
|
|
# tmp.index = np.arange(tmp.shape[0])
|
|
if tmp.empty:
|
|
tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":_name,"cmd":None,"args":None,"date":d,"time":t,"node":n}
|
|
|
|
else:
|
|
r = r.append(tmp,ignore_index=False)
|
|
if not r.empty :
|
|
# r.index = np.arange(r.shape[0])
|
|
df = r.copy()
|
|
#
|
|
# For security reasons lets has the args columns with an MD5 or sha256
|
|
#
|
|
|
|
if not df.empty and 'args' in df :
|
|
df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
|
|
STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'}
|
|
df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN'))
|
|
if 'cols' in args :
|
|
_cols = list(set(df.columns.tolist()) & set(args['cols']))
|
|
if _cols :
|
|
df = df[_cols]
|
|
#
|
|
# we return a list of objects (no data-frames)
|
|
if 'logger' in args and args['logger'] != None :
|
|
logger = args['logger']
|
|
logger(data=df)
|
|
df.index = np.arange(df.shape[0])
|
|
|
|
return df #.to_dict(orient='records')
|
|
|
|
except Exception as e:
|
|
print (e)
|
|
pass
|
|
|
|
# if __name__ == '__main__' :
|
|
# #
|
|
# # Being directly called (external use of the )
|
|
# print(read())
|