You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

140 lines
4.1 KiB
Python

"""
Steve L. Nyemba <steve@the-phi.com>
The Phi Technology - Smart Top
This program is the core for evaluating folders and applications. Each class is specialized to generate a report in a pandas data-frame
The classes will focus on Apps, Folders and Protocols
- SmartTop.get(**args)
@TODO:
Protocols (will be used in anomaly detection)
"""
from __future__ import division
import os
import subprocess
import numpy as np
import sys
import pandas as pd
import datetime
class SmartTop:
def get(self,**args):
return None
class Apps(SmartTop) :
def __init__(self):
"""
This class will process a system command and parse the outpout accordingly given a parser
@param parse is a parser pointer
"""
self.cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
self.xchar = ';'
def get_app(self,stream):
index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
if ' ' in cmd.split('/')[len(cmd.split('/'))-1] :
p = cmd.split('/')[len(cmd.split('/'))-1].split(' ')
name = p[0]
args = " ".join(p[1:])
else:
name = cmd.split('/')[len(cmd.split('/'))-1]
args = " ".join(stream[index:]) if index > 0 else ""
return [name,cmd,args]
def to_pandas(self,m):
"""
This function will convert the output of ps to a data-frame
@param m raw matrix i.e list of values like a csv
"""
m = [item for item in m if len(item) != len (m[0])]
m = "\n".join(m[1:])
df = pd.read_csv(pd.compat.StringIO(m),sep=self.xchar)
df.columns =['pid','user','mem','cpu','status','started','name','cmd','args']
return df
def empty(self,name):
return pd.DataFrame([{"pid":None,"user":None,"mem":0,"cpu":0,"status":"X","started":None,"name":name,"cmd":None,"args":None}])
def parse(self,rows):
m = []
TIME_INDEX = 5
ARGS_INDEX = 6
for item in rows :
if rows.index(item) != 0 :
parts = item.split(self.xchar)
row = parts[:TIME_INDEX]
row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
row += self.get_app(parts[ARGS_INDEX:])
else:
row = item.split(self.xchar)
row = (self.xchar.join(row)).strip()
if len(row.replace(";","")) > 0 :
m.append(row)
return m
def get(self,**args):
"""
This function returns a the output of a command to the calling code that is piped into the class
The output will be stored in a data frame with columns
@
"""
try:
handler = subprocess.Popen(self.cmd,shell=True,stdout=subprocess.PIPE)
stream = handler.communicate()[0]
rows = stream.split('\n')
df = self.to_pandas(self.parse(rows))
r = pd.DataFrame()
if 'filter' in args :
pattern = "|".join(args['filter'])
r = df[df.name.str.contains(pattern)]
for name in args['filter'] :
filter = "name == '"+name+"'"
if r.query(filter).size == 0 :
r = r.append(self.empty(name))
return r
except Exception,e:
print (e)
return None
class Folders(SmartTop):
"""
This class will assess a folder and produce a report in a data-frame that can be later on used for summary statistics
"""
def __init__(self):
pass
def _get(self,dir_path,r=[]):
for child in os.listdir(dir_path):
path = os.path.join(dir_path, child)
if os.path.isdir(path):
self._get(path,r)
else:
size = os.path.getsize(path)
file_date = os.path.getatime(path)
file_date = datetime.datetime.fromtimestamp(file_date)
now = datetime.datetime.now()
age = (now - file_date ).days
name = os.path.basename(path)
r.append({"name":name,"path":path,"size":size,"age":age})
return r
def get(self,**args):
# path = args['path']
if isinstance(args['path'],list) == False:
paths = [args['path']]
else:
paths = args['path']
_out = pd.DataFrame()
for path in paths :
if os.path.exists(path) :
#
# If the folder does NOT exists it should not be treated.
#
r = pd.DataFrame(self._get(path))
r = pd.DataFrame([{"name":path,"files":r.shape[0],"age_in_days":r.age.mean(),"size_in_kb":r.size.sum()}])
_out = _out.append(r)
return _out