You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
422 lines
12 KiB
Python
422 lines
12 KiB
Python
"""
|
|
This program is designed to inspect an application environment
|
|
This program should only be run on unix friendly systems
|
|
|
|
We enable the engines to be able to run a several configurations
|
|
Similarly to what a visitor design-pattern would do
|
|
"""
|
|
from __future__ import division
|
|
import os
|
|
import subprocess
|
|
from sets import Set
|
|
import re
|
|
import datetime
|
|
import urllib2 as http, base64
|
|
from threading import Thread, RLock
|
|
import time
|
|
import numpy as np
|
|
from utils.ml import ML
|
|
import sys
|
|
|
|
class Analysis:
|
|
def __init__(self):
|
|
self.logs = []
|
|
pass
|
|
def post(self,object):
|
|
self.logs.append(object)
|
|
def init(self):
|
|
d = datetime.datetime.now()
|
|
self.now = {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
|
|
def getNow(self):
|
|
d = datetime.datetime.now()
|
|
return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
|
|
def getName(self):
|
|
return self.__class__.__name__
|
|
def reboot(self,row,conf) :
|
|
return False
|
|
def cleanup(self,text):
|
|
return re.sub('[^a-zA-Z0-9\s:]',' ',str(text)).strip()
|
|
|
|
|
|
"""
|
|
This class is designed to analyze environment variables. Environment variables can either be folders, files or simple values
|
|
The class returns a quantifiable assessment of the environment variables (expected 100%)
|
|
"""
|
|
class Env(Analysis):
|
|
def __init__(self):
|
|
Analysis.__init__(self)
|
|
def init(self,values):
|
|
#Analysis.init(self)
|
|
self.values = values
|
|
"""
|
|
This function evaluate the validity of an environment variable by returning a 1 or 0 (computable)
|
|
The function will use propositional logic (https://en.wikipedia.org/wiki/Propositional_calculus)
|
|
"""
|
|
def evaluate(self,id):
|
|
|
|
if id in os.environ :
|
|
#
|
|
# We can inspect to make sure the environment variable is not a path or filename.
|
|
# Using propositional logic we proceed as follows:
|
|
# - (p) We determine if the value is an folder or file name (using regex)
|
|
# - (q) In case of a file or folder we check for existance
|
|
# The final result is a conjuction of p and q
|
|
#
|
|
value = os.environ[id]
|
|
expressions = [os.sep,'(\\.\w+)$']
|
|
p = sum([ re.search(xchar,value) is not None for xchar in expressions])
|
|
q = os.path.exists(value)
|
|
|
|
return int(p and q)
|
|
else:
|
|
return 0
|
|
|
|
def composite (self):
|
|
#Analysis.init(self)
|
|
r = [ self.evaluate(id) for id in self.values] ;
|
|
N = len(r)
|
|
n = sum(r)
|
|
value = 100 * round(n/N,2)
|
|
|
|
missing = [self.values[i] for i in range(0,N) if r[i] == 0]
|
|
return dict(self.getNow(),**{"value":value,"missing":missing})
|
|
"""
|
|
This class is designed to handle analaysis of the a python virtual environment i.e deltas between requirments file and a virtualenv
|
|
@TODO: update the virtual environment
|
|
"""
|
|
class Sandbox(Analysis):
|
|
def __init__(self):
|
|
Analysis.__init__(self)
|
|
def init(self,conf):
|
|
#Analysis.init(self)
|
|
if os.path.exists(conf['sandbox']) :
|
|
self.sandbox_path = conf['sandbox']
|
|
else:
|
|
self.sandbox_path = None
|
|
if os.path.exists(conf['requirements']) :
|
|
self.requirements_path = conf['requirements']
|
|
else:
|
|
self.requirements_path = None
|
|
|
|
def get_requirements (self):
|
|
f = open(self.requirements_path)
|
|
return [ name.replace('-',' ').replace('_',' ') for name in f.read().split('\n') if name != '']
|
|
"""
|
|
This function will return the modules installed in the sandbox (virtual environment)
|
|
"""
|
|
def get_sandbox_requirements(self):
|
|
cmd = ['freeze']
|
|
xchar = ''.join([os.sep]*2)
|
|
pip_vm = ''.join([self.sandbox_path,os.sep,'bin',os.sep,'pip']).replace(xchar,os.sep)
|
|
cmd = [pip_vm]+cmd
|
|
r = subprocess.check_output(cmd).split('\n')
|
|
return [row.replace('-',' ').replace('_',' ') for row in r if row.strip() != '']
|
|
def evaluate(self):
|
|
pass
|
|
def reboot(self,rows,limit=None) :
|
|
return sum([ len(item['missing']) for item in rows ]) > 0
|
|
"""
|
|
This function returns the ratio of existing modules relative to the ones expected
|
|
"""
|
|
def composite(self):
|
|
Analysis.init(self)
|
|
if self.sandbox_path and self.requirements_path :
|
|
required_modules= self.get_requirements()
|
|
sandbox_modules = self.get_sandbox_requirements()
|
|
N = len(required_modules)
|
|
n = len(Set(required_modules) - Set(sandbox_modules))
|
|
value = round(1 - (n/N),2)*100
|
|
missing = list(Set(required_modules) - Set(sandbox_modules))
|
|
|
|
return dict(self.getNow(),**{"value":value,"missing":missing})
|
|
else:
|
|
return None
|
|
|
|
"""
|
|
This class performs the analysis of a list of processes and determines
|
|
The class provides a quantifiable measure of how many processes it found over all
|
|
"""
|
|
class ProcessCounter(Analysis):
|
|
def __init__(self):
|
|
Analysis.__init__(self)
|
|
def init(self,names):
|
|
#Analysis.init(self)
|
|
self.names = names
|
|
def evaluate(self,name):
|
|
cmd = "".join(['ps -eo comm |grep ',name,' |wc -l'])
|
|
handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
|
|
|
return int(handler.communicate()[0].replace("\n","") )
|
|
def composite(self):
|
|
#Analysis.init(self)
|
|
r = {}
|
|
for name in self.names :
|
|
r[name] = self.evaluate(name)
|
|
|
|
#N = len(r)
|
|
#n = sum(r)
|
|
#return n/N
|
|
return dict(self.getNow(),**r)
|
|
|
|
"""
|
|
This class returns an application's both memory and cpu usage
|
|
"""
|
|
class DetailProcess(Analysis):
|
|
def __init__(self):
|
|
Analysis.__init__(self)
|
|
|
|
def init (self,names):
|
|
#Analysis.init(self)
|
|
self.names = names;
|
|
def getName(self):
|
|
return "apps"
|
|
def split(self,name,stream):
|
|
|
|
pattern = "(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)".replace(":name",name).strip()
|
|
g = re.match(pattern,stream.strip())
|
|
if g :
|
|
return list(g.groups())+['1']+[name]
|
|
else:
|
|
return ''
|
|
def reboot(self,rows,conf=None) :
|
|
return np.sum([int(item['label']=='crash') for item in rows]) > 0
|
|
def parse(self,row,fields):
|
|
"""
|
|
The last field should be the command in its integrity
|
|
@pre len(fields) > len(row)
|
|
"""
|
|
r = {}
|
|
|
|
now = self.getNow()
|
|
r['date'] = now
|
|
row = [term for term in row.split() if term.strip() != '']
|
|
for name in fields :
|
|
index = fields.index(name)
|
|
|
|
r[name] = row[index] if row else 0
|
|
if name not in ['user','cmd','status','pid'] :
|
|
r[name] = float(r[name])
|
|
r[name] = row[index: ] if row else []
|
|
#
|
|
# Let's set the status give the data extracted
|
|
#
|
|
if r['status'] == 0 :
|
|
r['status'] = 'crash'
|
|
elif 'Z' in r['status'] :
|
|
r['status'] = 'zombie'
|
|
elif r['memory_usage'] > 0 and r['cpu_usage'] > 0:
|
|
r['status'] = 'running'
|
|
else:
|
|
r['status'] = 'idle'
|
|
return r
|
|
|
|
def evaluate(self,name=None) :
|
|
if name is None :
|
|
name = ".*"
|
|
fields = ["user","pid","memory_usage","cpu_usage","memory_available","status","cmd"]
|
|
cmd = "ps -eo user,pid,pmem,pcpu,vsize,stat,command|grep -Ei \":app\"".replace(":app",name)
|
|
handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
|
logs = handler.communicate()[0].split('\n')
|
|
logs = [row for row in logs if (row.strip() != '') and ('grep -Ei' in row )== False ]
|
|
|
|
if len(logs) == 0:
|
|
return [dict(self.parse('',fields),**{'label':name}) ]
|
|
else :
|
|
return [dict(self.parse(row,fields),**{'label':name}) for row in logs if row.strip() != '' and 'grep' not in row and '-Ei' not in row]
|
|
|
|
def status(self,row):
|
|
x = row['memory_usage']
|
|
y = row['cpu_usage']
|
|
z = row['memory_available']
|
|
if z :
|
|
if y :
|
|
return "running"
|
|
return "idle"
|
|
else:
|
|
return "crash"
|
|
#def format(self,row):
|
|
# r= {"memory_usage":row[0],"cpu_usage":row[1],"memory_available":row[2]/1000,"proc_count":row[3],"label":self.cleanup(row[4])}
|
|
# status = self.status(r)
|
|
# r['status'] = status
|
|
# return r
|
|
|
|
def composite(self):
|
|
ma = []
|
|
for name in self.names:
|
|
row = self.evaluate(name)
|
|
ma += row
|
|
|
|
return ma
|
|
"""
|
|
This class evaluates a list of folders and provides detailed informaiton about age/size of each file
|
|
Additionally the the details are summarized in terms of global size, and oldest file.
|
|
"""
|
|
class FileWatch(Analysis):
|
|
def __init__(self):
|
|
pass
|
|
def init(self,folders):
|
|
print folders
|
|
self.folders = folders;
|
|
def getName(self):
|
|
return "folders"
|
|
def split(self,row):
|
|
|
|
x = row.split(' ')
|
|
r = {}
|
|
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
|
|
if x:
|
|
BYTES_TO_MB = 1000000
|
|
size = int(x[0])/BYTES_TO_MB
|
|
month = months.index(x[1]) + 1
|
|
day = int(x[2])
|
|
age = -1
|
|
hour=minute = 0
|
|
if ':' in x[3] :
|
|
hour,minute = x[3].split(':')
|
|
now = datetime.datetime.today()
|
|
if month == now.month :
|
|
year = now.year
|
|
else:
|
|
year = now.year - 1
|
|
else:
|
|
year = int(x[3])
|
|
hour = 0
|
|
minute = 0
|
|
|
|
|
|
|
|
file_date = datetime.datetime(year,month,day,int(hour),int(minute))
|
|
# size = round(size,2)
|
|
#file_date = datetime.datetime(year,month,day,hour,minute)
|
|
now = datetime.datetime.now()
|
|
age = (now - file_date ).days
|
|
|
|
return {"size":size,"age":age}
|
|
return None
|
|
def evaluate(self,dir_path):
|
|
for child in os.listdir(dir_path):
|
|
path = os.path.join(dir_path, child)
|
|
if os.path.isdir(path):
|
|
print("FOLDER: " + "\t" + path)
|
|
self.evaluate(path)
|
|
|
|
else:
|
|
size = os.path.getsize(path)
|
|
date = os.path.getctime(path)
|
|
date = datetime.datetime.fromtimestamp(z).strftime('{"year":%Y,"month":%m,"day":%d,"hour":%H,"min":%M}')
|
|
print("FILE: " + "\t" + path)
|
|
|
|
def __evaluate(self,path):
|
|
cmd = "find :path -print0|xargs -0 ls -ls |awk '{print $6,$7,$8,$9,$10}'".replace(":path",path)
|
|
handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
|
|
ostream = handler.communicate()[0].split('\n')
|
|
ostream = [row for row in ostream if row.strip() != '']
|
|
print cmd
|
|
print ostream[0]
|
|
print ostream[1]
|
|
#return [self.split(stream) for stream in ostream if stream.strip() != '' and '.DS_Store' not in stream and 'total' not in stream]
|
|
#return [self.split(stream) for stream in ostream if path not in stream and not set(['','total','.DS_Store']) & set(stream.split(' '))]
|
|
return []
|
|
def toMB(self,size):
|
|
m = {'GB':1000,'TB':1000000}
|
|
v,u = size.split(' ')
|
|
return round(float(v)* m[u.upper()],2)
|
|
|
|
def reboot(self,rows,limit) :
|
|
return np.sum([ int(self.toMB(item['size']) > self.toMB(limit)) for item in rows]) > 0
|
|
def composite(self):
|
|
d = [] #-- vector of details (age,size)
|
|
|
|
now = datetime.datetime.today()
|
|
for folder in self.folders:
|
|
if os.path.exists(folder):
|
|
xo_raw = self.evaluate(folder)
|
|
xo = np.array(ML.Extract(['size','age'],xo_raw))
|
|
if len(xo) == 0:
|
|
continue
|
|
name = re.findall("([a-z,A-Z,0-9]+)",folder)
|
|
name = folder.split(os.sep)
|
|
if len(name) == 1:
|
|
name = [folder]
|
|
else:
|
|
i = len(name) -1
|
|
name = [name[i-1]+' '+name[i]]
|
|
|
|
name = name[0]
|
|
size = round(np.sum(xo[:,0]),2)
|
|
if size > 1000 :
|
|
size = round(size/1000,2)
|
|
units = ' GB'
|
|
elif size > 1000000:
|
|
size = round(size/1000000,2)
|
|
units = ' TB'
|
|
else:
|
|
size = size
|
|
units = ' MB'
|
|
size = str(size)+ units
|
|
age = round(np.mean(xo[:,1]),2)
|
|
if age > 30 and age <= 365 :
|
|
age = round(age/30,2)
|
|
units = ' Months'
|
|
elif age > 365 :
|
|
age = round(age/365,2)
|
|
units = ' Years'
|
|
else:
|
|
age = age
|
|
units = ' Days'
|
|
age = str(age)+units
|
|
N = len(xo[:,1])
|
|
xo = {"label":folder} #,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}}
|
|
xo = dict(xo,**{"size":size,"age":age,"count":N})
|
|
xo["name"] = name
|
|
xo['day'] = now.day
|
|
xo['month'] = now.month
|
|
xo['year'] = now.year
|
|
xo['date'] = time.mktime(now.timetuple())
|
|
|
|
d.append(xo)
|
|
|
|
return d
|
|
|
|
|
|
# class Monitor (Thread):
|
|
# def __init__(self,pConfig,pWriter,id='processes') :
|
|
# Thread.__init__(self)
|
|
|
|
# self.config = pConfig[id]
|
|
# self.writer = pWriter;
|
|
# self.logs = []
|
|
# self.handler = self.config['class']
|
|
# self.mconfig = self.config['config']
|
|
|
|
|
|
|
|
# def stop(self):
|
|
# self.keep_running = False
|
|
# def run(self):
|
|
# r = {}
|
|
# self.keep_running = True
|
|
# lock = RLock()
|
|
# while self.keep_running:
|
|
# lock.acquire()
|
|
# for label in self.mconfig:
|
|
|
|
# self.handler.init(self.mconfig[label])
|
|
# r = self.handler.composite()
|
|
# self.writer.write(label=label,row = r)
|
|
|
|
# time.sleep(2)
|
|
# lock.release()
|
|
|
|
# self.prune()
|
|
# TIME_LAPSE = 60*2
|
|
# time.sleep(TIME_LAPSE)
|
|
# print "Stopped ..."
|
|
# def prune(self) :
|
|
|
|
# MAX_ENTRIES = 100
|
|
# if len(self.logs) > MAX_ENTRIES :
|
|
# BEG = len(self.logs) - MAX_SIZE -1
|
|
# self.logs = self.logs[BEG:]
|
|
|