""" This program is designed to inspect an application environment This program should only be run on unix friendly systems We enable the engines to be able to run a several configurations Similarly to what a visitor design-pattern would do """ from __future__ import division import os import subprocess from sets import Set import re import datetime import urllib2 as http, base64 from threading import Thread, RLock import time import numpy as np from utils.ml import ML import sys class Analysis: def __init__(self): self.logs = [] pass def post(self,object): self.logs.append(object) def init(self): d = datetime.datetime.now() self.now = {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute} def getNow(self): d = datetime.datetime.now() return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute,"second":d.second} def getName(self): return self.__class__.__name__ def reboot(self,row,conf) : return False def cleanup(self,text): return re.sub('[^a-zA-Z0-9\s:]',' ',str(text)).strip() """ This class is designed to analyze environment variables. Environment variables can either be folders, files or simple values The class returns a quantifiable assessment of the environment variables (expected 100%) """ class Env(Analysis): def __init__(self): Analysis.__init__(self) def init(self,values): #Analysis.init(self) self.values = values """ This function evaluate the validity of an environment variable by returning a 1 or 0 (computable) The function will use propositional logic (https://en.wikipedia.org/wiki/Propositional_calculus) """ def evaluate(self,id): if id in os.environ : # # We can inspect to make sure the environment variable is not a path or filename. # Using propositional logic we proceed as follows: # - (p) We determine if the value is an folder or file name (using regex) # - (q) In case of a file or folder we check for existance # The final result is a conjuction of p and q # value = os.environ[id] expressions = [os.sep,'(\\.\w+)$'] p = sum([ re.search(xchar,value) is not None for xchar in expressions]) q = os.path.exists(value) return int(p and q) else: return 0 def composite (self): #Analysis.init(self) r = [ self.evaluate(id) for id in self.values] ; N = len(r) n = sum(r) value = 100 * round(n/N,2) missing = [self.values[i] for i in range(0,N) if r[i] == 0] return dict(self.getNow(),**{"value":value,"missing":missing}) """ This class is designed to handle analaysis of the a python virtual environment i.e deltas between requirments file and a virtualenv @TODO: update the virtual environment """ class Sandbox(Analysis): def __init__(self): Analysis.__init__(self) def init(self,conf): #Analysis.init(self) if os.path.exists(conf['sandbox']) : self.sandbox_path = conf['sandbox'] else: self.sandbox_path = None if os.path.exists(conf['requirements']) : self.requirements_path = conf['requirements'] else: self.requirements_path = None def get_requirements (self): f = open(self.requirements_path) return [ name.replace('-',' ').replace('_',' ') for name in f.read().split('\n') if name != ''] """ This function will return the modules installed in the sandbox (virtual environment) """ def get_sandbox_requirements(self): cmd = ['freeze'] xchar = ''.join([os.sep]*2) pip_vm = ''.join([self.sandbox_path,os.sep,'bin',os.sep,'pip']).replace(xchar,os.sep) cmd = [pip_vm]+cmd r = subprocess.check_output(cmd).split('\n') return [row.replace('-',' ').replace('_',' ') for row in r if row.strip() != ''] def evaluate(self): pass def reboot(self,rows,limit=None) : return sum([ len(item['missing']) for item in rows ]) > 0 """ This function returns the ratio of existing modules relative to the ones expected """ def composite(self): Analysis.init(self) if self.sandbox_path and self.requirements_path : required_modules= self.get_requirements() sandbox_modules = self.get_sandbox_requirements() N = len(required_modules) n = len(Set(required_modules) - Set(sandbox_modules)) value = round(1 - (n/N),2)*100 missing = list(Set(required_modules) - Set(sandbox_modules)) return dict(self.getNow(),**{"value":value,"missing":missing}) else: return None """ This class performs the analysis of a list of processes and determines The class provides a quantifiable measure of how many processes it found over all """ class ProcessCounter(Analysis): def __init__(self): Analysis.__init__(self) def init(self,names): #Analysis.init(self) self.names = names def evaluate(self,name): cmd = "".join(['ps -eo comm |grep ',name,' |wc -l']) handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) return int(handler.communicate()[0].replace("\n","") ) def composite(self): #Analysis.init(self) r = {} for name in self.names : r[name] = self.evaluate(name) #N = len(r) #n = sum(r) #return n/N return dict(self.getNow(),**r) """ This class returns an application's both memory and cpu usage """ class DetailProcess(Analysis): def __init__(self): Analysis.__init__(self) def init (self,names): #Analysis.init(self) self.names = names; def getName(self): return "apps" def split(self,name,stream): pattern = "(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)".replace(":name",name).strip() g = re.match(pattern,stream.strip()) if g : return list(g.groups())+['1']+[name] else: return '' def reboot(self,rows,conf=None) : return np.sum([int(item['label']=='crash') for item in rows]) > 0 def parse(self,row,fields): """ The last field should be the command in its integrity @pre len(fields) > len(row) """ r = {} now = self.getNow() r['date'] = now row = [term for term in row.split() if term.strip() != ''] for name in fields : index = fields.index(name) r[name] = row[index] if row else 0 if name not in ['user','cmd','status','pid'] : r[name] = float(r[name]) r[name] = row[index: ] if row else [] # # Let's set the status give the data extracted # if r['status'] == 0 : r['status'] = 'crash' elif 'Z' in r['status'] : r['status'] = 'zombie' elif r['memory_usage'] > 0 and r['cpu_usage'] > 0: r['status'] = 'running' else: r['status'] = 'idle' return r def evaluate(self,name=None) : if name is None : name = ".*" fields = ["user","pid","memory_usage","cpu_usage","memory_available","status","cmd"] cmd = "ps -eo user,pid,pmem,pcpu,vsize,stat,command|grep -Ei \":app\"".replace(":app",name) handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) logs = handler.communicate()[0].split('\n') logs = [row for row in logs if (row.strip() != '') and ('grep -Ei' in row )== False ] if len(logs) == 0: return [dict(self.parse('',fields),**{'label':name}) ] else : return [dict(self.parse(row,fields),**{'label':name}) for row in logs if row.strip() != '' and 'grep' not in row and '-Ei' not in row] def status(self,row): x = row['memory_usage'] y = row['cpu_usage'] z = row['memory_available'] if z : if y : return "running" return "idle" else: return "crash" #def format(self,row): # r= {"memory_usage":row[0],"cpu_usage":row[1],"memory_available":row[2]/1000,"proc_count":row[3],"label":self.cleanup(row[4])} # status = self.status(r) # r['status'] = status # return r def composite(self): ma = [] for name in self.names: row = self.evaluate(name) ma += row return ma """ This class evaluates a list of folders and provides detailed informaiton about age/size of each file Additionally the the details are summarized in terms of global size, and oldest file. """ class FileWatch(Analysis): def __init__(self): pass def init(self,folders): self.folders = folders; self.cache = [] def getName(self): return "folders" def evaluate(self,dir_path,r=[]): """ This function will recursively scan a folder and retrieve file sizes and age of the files. The data will be returned as an array of {size,age,label} items """ for child in os.listdir(dir_path): path = os.path.join(dir_path, child) if os.path.isdir(path): self.evaluate(path,r) else: size = os.path.getsize(path) file_date = os.path.getatime(path) file_date = datetime.datetime.fromtimestamp(file_date) now = datetime.datetime.now() age = (now - file_date ).days r.append({"label":path,"size":size,"age":age,"date":self.getNow()}) return r def reboot(self,rows,limit) : return np.sum([ 1 for item in rows if rows['size'] > limit ]) > 0 def composite(self): out = [] for folder in self.folders : r = self.evaluate(folder,[]) file_count = len(r) age = {"mean":np.mean([item['age'] for item in r] ),"var":np.mean([item['age'] for item in r])} size = {"mean":np.mean([item['size'] for item in r] ),"var":np.mean([item['size'] for item in r])} out.append({"label":folder,"stats":{"age":age,"size":size,"file_count":file_count},"logs":r}) return out # class Monitor (Thread): # def __init__(self,pConfig,pWriter,id='processes') : # Thread.__init__(self) # self.config = pConfig[id] # self.writer = pWriter; # self.logs = [] # self.handler = self.config['class'] # self.mconfig = self.config['config'] # def stop(self): # self.keep_running = False # def run(self): # r = {} # self.keep_running = True # lock = RLock() # while self.keep_running: # lock.acquire() # for label in self.mconfig: # self.handler.init(self.mconfig[label]) # r = self.handler.composite() # self.writer.write(label=label,row = r) # time.sleep(2) # lock.release() # self.prune() # TIME_LAPSE = 60*2 # time.sleep(TIME_LAPSE) # print "Stopped ..." # def prune(self) : # MAX_ENTRIES = 100 # if len(self.logs) > MAX_ENTRIES : # BEG = len(self.logs) - MAX_SIZE -1 # self.logs = self.logs[BEG:]