""" Steve L. Nyemba The Phi Technology - Smart Top This program is the core for evaluating folders and applications. Each class is specialized to generate a report in a pandas data-frame The classes will focus on Apps, Folders and Protocols - SmartTop.get(**args) @TODO: Protocols (will be used in anomaly detection) """ from __future__ import division import os import subprocess import numpy as np import sys import pandas as pd import datetime class SmartTop: def get(self,**args): return None class Apps(SmartTop) : def __init__(self): """ This class will process a system command and parse the outpout accordingly given a parser @param parse is a parser pointer """ self.cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'" self.xchar = ';' def get_app(self,stream): index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1 cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream) if ' ' in cmd.split('/')[len(cmd.split('/'))-1] : p = cmd.split('/')[len(cmd.split('/'))-1].split(' ') name = p[0] args = " ".join(p[1:]) else: name = cmd.split('/')[len(cmd.split('/'))-1] args = " ".join(stream[index:]) if index > 0 else "" return [name,cmd,args] def to_pandas(self,m): """ This function will convert the output of ps to a data-frame @param m raw matrix i.e list of values like a csv """ m = [item for item in m if len(item) != len (m[0])] m = "\n".join(m[1:]) df = pd.read_csv(pd.compat.StringIO(m),sep=self.xchar) df.columns =['pid','user','mem','cpu','status','started','name','cmd','args'] return df def empty(self,name): return pd.DataFrame([{"pid":None,"user":None,"mem":0,"cpu":0,"status":"X","started":None,"name":name,"cmd":None,"args":None}]) def parse(self,rows): m = [] TIME_INDEX = 5 ARGS_INDEX = 6 for item in rows : if rows.index(item) != 0 : parts = item.split(self.xchar) row = parts[:TIME_INDEX] row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX])) row += self.get_app(parts[ARGS_INDEX:]) else: row = item.split(self.xchar) row = (self.xchar.join(row)).strip() if len(row.replace(";","")) > 0 : m.append(row) return m def get(self,**args): """ This function returns a the output of a command to the calling code that is piped into the class The output will be stored in a data frame with columns @ """ try: handler = subprocess.Popen(self.cmd,shell=True,stdout=subprocess.PIPE) stream = handler.communicate()[0] rows = stream.split('\n') df = self.to_pandas(self.parse(rows)) r = pd.DataFrame() if 'filter' in args : pattern = "|".join(args['filter']) r = df[df.name.str.contains(pattern)] for name in args['filter'] : filter = "name == '"+name+"'" if r.query(filter).size == 0 : r = r.append(self.empty(name)) return r except Exception,e: print (e) return None class Folders(SmartTop): """ This class will assess a folder and produce a report in a data-frame that can be later on used for summary statistics """ def __init__(self): pass def _get(self,dir_path,r=[]): for child in os.listdir(dir_path): path = os.path.join(dir_path, child) if os.path.isdir(path): self._get(path,r) else: size = os.path.getsize(path) file_date = os.path.getatime(path) file_date = datetime.datetime.fromtimestamp(file_date) now = datetime.datetime.now() age = (now - file_date ).days name = os.path.basename(path) r.append({"name":name,"path":path,"size":size,"age":age}) return r def get(self,**args): # path = args['path'] if isinstance(args['path'],list) == False: paths = [args['path']] else: paths = args['path'] _out = pd.DataFrame() for path in paths : if os.path.exists(path) : # # If the folder does NOT exists it should not be treated. # r = pd.DataFrame(self._get(path)) r = pd.DataFrame([{"name":path,"files":r.shape[0],"age_in_days":r.age.mean(),"size_in_kb":r.size.sum()}]) _out = _out.append(r) return _out