smart-top/src/monitor.py

"""
	This program is designed to inspect an application environment
	This program should only be run on unix friendly systems

	We enable the engines to be able to run a several configurations
	Similarly to what a visitor design-pattern would do
"""
from __future__  import division
import os
import subprocess
from sets import Set
import re
import datetime
import urllib2 as http, base64
from threading import Thread, RLock
import time
import numpy as np
from utils.ml import ML
import sys

class Analysis:
	def __init__(self):
		self.logs = []
		pass
	def post(self,object):
		self.logs.append(object)
	def init(self):
		d = datetime.datetime.now()
		self.now = {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
	def getNow(self):
		d = datetime.datetime.now()
		return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
	def getName(self):
		return self.__class__.__name__
	def reboot(self,row,conf) :
		return False
	def cleanup(self,text):
		return re.sub('[^a-zA-Z0-9\s:]',' ',str(text)).strip()


"""
	This class is designed to analyze environment variables. Environment variables can either be folders, files or simple values
	The class returns a quantifiable assessment of the environment variables (expected 100%)
"""
class Env(Analysis):
	def __init__(self):
		Analysis.__init__(self)
	def init(self,values):
		#Analysis.init(self)
		self.values = values
	"""
		This function evaluate the validity of an environment variable by returning a 1 or 0 (computable)
		The function will use propositional logic (https://en.wikipedia.org/wiki/Propositional_calculus)
	"""
	def evaluate(self,id):

		if id in os.environ :
			#
			# We can inspect to make sure the environment variable is not a path or filename.
			# Using propositional logic we proceed as follows:
			# 	- (p) We determine if the value is an folder or file name (using regex)
			# 	- (q) In case of a file or folder we check for existance
			# The final result is a conjuction of p and q
			#
			value = os.environ[id]
			expressions = [os.sep,'(\\.\w+)$']
			p = sum([ re.search(xchar,value) is not None for xchar in expressions])
			q = os.path.exists(value)

			return int(p and q)
		else:
			return 0

	def composite (self):
		#Analysis.init(self)
		r = [ self.evaluate(id) for id in self.values] ;
		N = len(r)
		n = sum(r)
		value = 100 * round(n/N,2)

		missing = [self.values[i] for i in range(0,N) if r[i] == 0]
		return dict(self.getNow(),**{"value":value,"missing":missing})
"""
	This class is designed to handle analaysis of the a python virtual environment i.e deltas between requirments file and a virtualenv
	@TODO: update the virtual environment
"""
class Sandbox(Analysis):
	def __init__(self):
		Analysis.__init__(self)
	def init(self,conf):
		#Analysis.init(self)
		if os.path.exists(conf['sandbox']) :
			self.sandbox_path = conf['sandbox']
		else:
			self.sandbox_path = None
		if os.path.exists(conf['requirements']) :
			self.requirements_path = conf['requirements']
		else:
			self.requirements_path = None

	def get_requirements (self):
		f = open(self.requirements_path)
		return [ name.replace('-',' ').replace('_',' ') for name in f.read().split('\n') if name != '']
	"""
		This function will return the modules installed in the sandbox (virtual environment)
	"""
	def get_sandbox_requirements(self):
		cmd = ['freeze']
		xchar = ''.join([os.sep]*2)
		pip_vm = ''.join([self.sandbox_path,os.sep,'bin',os.sep,'pip']).replace(xchar,os.sep)
		cmd = [pip_vm]+cmd
		r = subprocess.check_output(cmd).split('\n')
		return [row.replace('-',' ').replace('_',' ') for row in r if row.strip() != '']
	def evaluate(self):
		pass
	def reboot(self,rows,limit=None) :
		return sum([ len(item['missing']) for item in rows ]) > 0
	"""
		This function returns the ratio of existing modules relative to the ones expected
	"""
	def composite(self):
		Analysis.init(self)
		if self.sandbox_path and self.requirements_path :
			required_modules= self.get_requirements()
			sandbox_modules	= self.get_sandbox_requirements()
			N = len(required_modules)
			n = len(Set(required_modules) - Set(sandbox_modules))
			value = round(1 - (n/N),2)*100
			missing = list(Set(required_modules) - Set(sandbox_modules))

			return dict(self.getNow(),**{"value":value,"missing":missing})
		else:
			return None

"""
	This class performs the analysis of a list of processes and determines
	The class provides a quantifiable measure of how many processes it found over all
"""
class ProcessCounter(Analysis):
	def __init__(self):
		Analysis.__init__(self)
	def init(self,names):
		#Analysis.init(self)
		self.names = names
	def evaluate(self,name):
		cmd  = "".join(['ps -eo comm |grep ',name,' |wc -l'])
		handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)

		return int(handler.communicate()[0].replace("\n","") )
	def composite(self):
		#Analysis.init(self)
		r = {}
		for name in self.names :
			r[name] = self.evaluate(name)

		#N = len(r)
		#n = sum(r)
		#return n/N
		return dict(self.getNow(),**r)

"""
	This class returns an application's both memory and cpu usage
"""
class DetailProcess(Analysis):
	def __init__(self):
		Analysis.__init__(self)

	def init (self,names):
		#Analysis.init(self)
		self.names = names;
	def getName(self):
		return "apps"
	def split(self,name,stream):

		pattern = "(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)\x20*(\d+.{0,1}\d*)".replace(":name",name).strip()
		g = re.match(pattern,stream.strip())
		if g :
			return list(g.groups())+['1']+[name]
		else:
			return ''
	def reboot(self,rows,conf=None) :
		return np.sum([int(item['label']=='crash') for item in rows]) > 0
	def parse(self,row,fields):
		"""
			The last field should be the command in its integrity
			@pre len(fields) > len(row)
		"""
		r = {}

		now = self.getNow()
		r['date'] = now
		row = [term for term in row.split() if term.strip() != '']
		for name in fields :
			index = fields.index(name)

			r[name] = row[index] if row else 0
			if name not in ['user','cmd','status','pid'] :
				r[name] = float(r[name])
		r[name] = row[index: ] if row else []
		#
		# Let's set the status give the data extracted
		#
		if r['status'] == 0 :
			r['status'] = 'crash'
		elif 'Z' in r['status'] :
			r['status'] = 'zombie'
		elif r['memory_usage'] > 0 and r['cpu_usage'] > 0:
			r['status'] = 'running'
		else:
			r['status'] = 'idle'
		return r

	def evaluate(self,name=None) :
		if name is None :
			name = ".*"
		fields = ["user","pid","memory_usage","cpu_usage","memory_available","status","cmd"]
		cmd = "ps -eo user,pid,pmem,pcpu,vsize,stat,command|grep -Ei \":app\"".replace(":app",name)
		handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
		logs	= handler.communicate()[0].split('\n')
		logs	= [row for row in logs if (row.strip() != '') and ('grep -Ei' in row )== False ]

		if len(logs) == 0:
			return [dict(self.parse('',fields),**{'label':name}) ]
		else :
			return [dict(self.parse(row,fields),**{'label':name}) for row in logs  if row.strip() != '' and 'grep' not in row and '-Ei' not in row]

	def status(self,row):
		x = row['memory_usage']
		y = row['cpu_usage']
		z = row['memory_available']
		if z :
			if y :
				return "running"
			return "idle"
		else:
			return "crash"
	#def format(self,row):
	#	r= {"memory_usage":row[0],"cpu_usage":row[1],"memory_available":row[2]/1000,"proc_count":row[3],"label":self.cleanup(row[4])}
	#	status = self.status(r)
	#	r['status'] = status
	#	return r

	def composite(self):
		ma = []
		for name in self.names:
			row = self.evaluate(name)
			ma += row

		return ma
"""
	This class evaluates a list of folders and provides detailed informaiton about age/size of each file
	Additionally the the details are summarized in terms of global size, and oldest file.
"""
class FileWatch(Analysis):
	def __init__(self):
		pass
	def init(self,folders):
		print folders
		self.folders = folders;
	def getName(self):
		return "folders"
	def split(self,row):

		x = row.split(' ')
		r = {}
		months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
		if x:
			BYTES_TO_MB = 1000000
			size = int(x[0])/BYTES_TO_MB
			month	= months.index(x[1]) + 1
			day	= int(x[2])
			age = -1
			hour=minute = 0
			if ':' in x[3] :
				hour,minute	= x[3].split(':')
				now = datetime.datetime.today()
				if month == now.month :
					year	= now.year
				else:
					year = now.year - 1
			else:
				year = int(x[3])
				hour = 0
				minute = 0


			file_date = datetime.datetime(year,month,day,int(hour),int(minute))
			# size = round(size,2)
			#file_date = datetime.datetime(year,month,day,hour,minute)
			now = datetime.datetime.now()
			age = (now - file_date ).days

			return {"size":size,"age":age}
		return None
	def evaluate(self,dir_path):
		for child in os.listdir(dir_path):
			path = os.path.join(dir_path, child)
			if os.path.isdir(path):
				print("FOLDER: " + "\t" + path)
				self.evaluate(path)

			else:
				size = os.path.getsize(path)
				date = os.path.getctime(path)
				date = datetime.datetime.fromtimestamp(z).strftime('{"year":%Y,"month":%m,"day":%d,"hour":%H,"min":%M}')
				print("FILE: " + "\t" + path)

	def __evaluate(self,path):
		cmd = "find  :path -print0|xargs -0 ls -ls |awk '{print $6,$7,$8,$9,$10}'".replace(":path",path)
		handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
		ostream = handler.communicate()[0].split('\n')
		ostream = [row for row in ostream if row.strip() != '']
		print cmd
		print ostream[0]
		print ostream[1]
		#return [self.split(stream) for stream in ostream if stream.strip() != '' and '.DS_Store' not in stream and 'total' not in stream]
		#return [self.split(stream) for stream in ostream if path not in stream and not set(['','total','.DS_Store']) & set(stream.split(' '))]
		return []
	def toMB(self,size):
		m = {'GB':1000,'TB':1000000}
		v,u = size.split(' ')
		return round(float(v)* m[u.upper()],2)

	def reboot(self,rows,limit) :
		return np.sum([ int(self.toMB(item['size']) > self.toMB(limit)) for item in rows]) > 0
	def composite(self):
		d = [] #-- vector of details (age,size)

		now = datetime.datetime.today()
		for folder in self.folders:
			if os.path.exists(folder):
				xo_raw = self.evaluate(folder)
				xo = np.array(ML.Extract(['size','age'],xo_raw))
				if len(xo) == 0:
					continue
				name = re.findall("([a-z,A-Z,0-9]+)",folder)
				name = folder.split(os.sep)
				if len(name) == 1:
					name = [folder]
				else:
					i = len(name) -1
					name = [name[i-1]+' '+name[i]]

				name = name[0]
				size = round(np.sum(xo[:,0]),2)
				if size > 1000 :
					size = round(size/1000,2)
					units = ' GB'
				elif size > 1000000:
					size = round(size/1000000,2)
					units = ' TB'
				else:
					size = size
					units = ' MB'
				size = str(size)+ units
				age = round(np.mean(xo[:,1]),2)
				if age > 30 and age <= 365 :
					age = round(age/30,2)
					units = ' Months'
				elif age > 365 :
					age = round(age/365,2)
					units = ' Years'
				else:
					age = age
					units = ' Days'
				age = str(age)+units
				N = len(xo[:,1])
				xo = {"label":folder} #,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}}
				xo = dict(xo,**{"size":size,"age":age,"count":N})
				xo["name"] = name
				xo['day'] = now.day
				xo['month'] = now.month
				xo['year'] = now.year
				xo['date'] = time.mktime(now.timetuple())

				d.append(xo)

		return d


# class Monitor (Thread):
# 	def __init__(self,pConfig,pWriter,id='processes') :
# 		Thread.__init__(self)

# 		self.config 	= pConfig[id]
# 		self.writer	= pWriter;
# 		self.logs	= []
# 		self.handler = self.config['class']
# 		self.mconfig = self.config['config']


# 	def stop(self):
# 		self.keep_running = False
# 	def run(self):
# 		r = {}
# 		self.keep_running = True
# 		lock = RLock()
# 		while self.keep_running:
# 			lock.acquire()
# 			for label in self.mconfig:

# 				self.handler.init(self.mconfig[label])
# 				r = self.handler.composite()
# 				self.writer.write(label=label,row = r)

# 				time.sleep(2)
# 			lock.release()

# 			self.prune()
# 			TIME_LAPSE = 60*2
# 			time.sleep(TIME_LAPSE)
# 		print "Stopped ..."
# 	def prune(self) :

# 		MAX_ENTRIES = 100
# 		if len(self.logs) > MAX_ENTRIES :
# 			BEG = len(self.logs) - MAX_SIZE -1
# 			self.logs = self.logs[BEG:]