You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
140 lines
4.1 KiB
Python
140 lines
4.1 KiB
Python
"""
|
|
Steve L. Nyemba <steve@the-phi.com>
|
|
The Phi Technology - Smart Top
|
|
|
|
This program is the core for evaluating folders and applications. Each class is specialized to generate a report in a pandas data-frame
|
|
The classes will focus on Apps, Folders and Protocols
|
|
- SmartTop.get(**args)
|
|
@TODO:
|
|
Protocols (will be used in anomaly detection)
|
|
"""
|
|
from __future__ import division
|
|
import os
|
|
import subprocess
|
|
import numpy as np
|
|
import sys
|
|
import pandas as pd
|
|
import datetime
|
|
class SmartTop:
|
|
def get(self,**args):
|
|
return None
|
|
|
|
class Apps(SmartTop) :
|
|
def __init__(self):
|
|
"""
|
|
This class will process a system command and parse the outpout accordingly given a parser
|
|
@param parse is a parser pointer
|
|
"""
|
|
self.cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
|
|
self.xchar = ';'
|
|
def get_app(self,stream):
|
|
index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1
|
|
|
|
cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream)
|
|
|
|
if ' ' in cmd.split('/')[len(cmd.split('/'))-1] :
|
|
p = cmd.split('/')[len(cmd.split('/'))-1].split(' ')
|
|
name = p[0]
|
|
args = " ".join(p[1:])
|
|
else:
|
|
name = cmd.split('/')[len(cmd.split('/'))-1]
|
|
args = " ".join(stream[index:]) if index > 0 else ""
|
|
return [name,cmd,args]
|
|
def to_pandas(self,m):
|
|
"""
|
|
This function will convert the output of ps to a data-frame
|
|
@param m raw matrix i.e list of values like a csv
|
|
|
|
"""
|
|
m = [item for item in m if len(item) != len (m[0])]
|
|
m = "\n".join(m[1:])
|
|
df = pd.read_csv(pd.compat.StringIO(m),sep=self.xchar)
|
|
df.columns =['pid','user','mem','cpu','status','started','name','cmd','args']
|
|
return df
|
|
def empty(self,name):
|
|
return pd.DataFrame([{"pid":None,"user":None,"mem":0,"cpu":0,"status":"X","started":None,"name":name,"cmd":None,"args":None}])
|
|
def parse(self,rows):
|
|
m = []
|
|
TIME_INDEX = 5
|
|
ARGS_INDEX = 6
|
|
|
|
for item in rows :
|
|
if rows.index(item) != 0 :
|
|
parts = item.split(self.xchar)
|
|
row = parts[:TIME_INDEX]
|
|
row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX]))
|
|
row += self.get_app(parts[ARGS_INDEX:])
|
|
else:
|
|
row = item.split(self.xchar)
|
|
row = (self.xchar.join(row)).strip()
|
|
if len(row.replace(";","")) > 0 :
|
|
m.append(row)
|
|
|
|
return m
|
|
def get(self,**args):
|
|
"""
|
|
This function returns a the output of a command to the calling code that is piped into the class
|
|
The output will be stored in a data frame with columns
|
|
@
|
|
"""
|
|
try:
|
|
|
|
handler = subprocess.Popen(self.cmd,shell=True,stdout=subprocess.PIPE)
|
|
stream = handler.communicate()[0]
|
|
rows = stream.split('\n')
|
|
df = self.to_pandas(self.parse(rows))
|
|
r = pd.DataFrame()
|
|
if 'filter' in args :
|
|
pattern = "|".join(args['filter'])
|
|
r = df[df.name.str.contains(pattern)]
|
|
for name in args['filter'] :
|
|
filter = "name == '"+name+"'"
|
|
if r.query(filter).size == 0 :
|
|
r = r.append(self.empty(name))
|
|
return r
|
|
except Exception,e:
|
|
print (e)
|
|
return None
|
|
|
|
|
|
class Folders(SmartTop):
|
|
"""
|
|
This class will assess a folder and produce a report in a data-frame that can be later on used for summary statistics
|
|
"""
|
|
def __init__(self):
|
|
pass
|
|
def _get(self,dir_path,r=[]):
|
|
for child in os.listdir(dir_path):
|
|
path = os.path.join(dir_path, child)
|
|
if os.path.isdir(path):
|
|
self._get(path,r)
|
|
|
|
else:
|
|
size = os.path.getsize(path)
|
|
file_date = os.path.getatime(path)
|
|
file_date = datetime.datetime.fromtimestamp(file_date)
|
|
now = datetime.datetime.now()
|
|
age = (now - file_date ).days
|
|
|
|
name = os.path.basename(path)
|
|
r.append({"name":name,"path":path,"size":size,"age":age})
|
|
return r
|
|
|
|
def get(self,**args):
|
|
# path = args['path']
|
|
|
|
if isinstance(args['path'],list) == False:
|
|
paths = [args['path']]
|
|
else:
|
|
paths = args['path']
|
|
_out = pd.DataFrame()
|
|
for path in paths :
|
|
if os.path.exists(path) :
|
|
#
|
|
# If the folder does NOT exists it should not be treated.
|
|
#
|
|
r = pd.DataFrame(self._get(path))
|
|
r = pd.DataFrame([{"name":path,"files":r.shape[0],"age_in_days":r.age.mean(),"size_in_kb":r.size.sum()}])
|
|
_out = _out.append(r)
|
|
return _out
|