From c3ce3227ff889aa562901c3750ed1c16088ce75a Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Sun, 19 Nov 2017 18:05:37 -0600 Subject: [PATCH] Bug fix and enhancement with folder monitoring --- src/data-collector.py | 2 +- src/monitor.py | 135 +++++++++----------------------------- src/utils/agents/actor.py | 4 +- test/TestServerMonitor.py | 23 ++----- 4 files changed, 42 insertions(+), 122 deletions(-) diff --git a/src/data-collector.py b/src/data-collector.py index 42dc14f..a2978a1 100755 --- a/src/data-collector.py +++ b/src/data-collector.py @@ -21,7 +21,7 @@ from threading import Thread, RLock import monitor import utils.agents.actor as actor from utils.agents.manager import Manager -SYS_ARGS['host']='localhost' +SYS_ARGS['host']='localhost'/ ENDPOINT="http://:host/monitor".replace(":host",SYS_ARGS['host']) class Collector(Thread) : def __init__(self): diff --git a/src/monitor.py b/src/monitor.py index 77c5e4e..f2d5d1d 100755 --- a/src/monitor.py +++ b/src/monitor.py @@ -29,7 +29,7 @@ class Analysis: self.now = {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute} def getNow(self): d = datetime.datetime.now() - return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute} + return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute,"second":d.second} def getName(self): return self.__class__.__name__ def reboot(self,row,conf) : @@ -167,7 +167,6 @@ class DetailProcess(Analysis): def init (self,names): #Analysis.init(self) - self.names = names; def getName(self): return "apps" @@ -218,7 +217,7 @@ class DetailProcess(Analysis): cmd = "ps -eo user,pid,pmem,pcpu,vsize,stat,command|grep -Ei \":app\"".replace(":app",name) handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) logs = handler.communicate()[0].split('\n') - logs = [row for row in logs if (row.strip() != '') and ('grep -Ei' in row )== False and (__file__ not in row)] + logs = [row for row in logs if (row.strip() != '') and ('grep -Ei' in row )== False ] if len(logs) == 0: return [dict(self.parse('',fields),**{'label':name}) ] @@ -255,113 +254,43 @@ class DetailProcess(Analysis): class FileWatch(Analysis): def __init__(self): pass - def init(self,folders): + def init(self,folders): self.folders = folders; + self.cache = [] def getName(self): return "folders" - def split(self,row): - - x = row.split(' ') - r = {} - months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] - if x: - BYTES_TO_MB = 1000000 - size = int(x[0])/BYTES_TO_MB - month = months.index(x[1]) + 1 - day = int(x[2]) - age = -1 - hour=minute = 0 - if ':' in x[3] : - hour,minute = x[3].split(':') - now = datetime.datetime.today() - if month == now.month : - year = now.year - else: - year = now.year - 1 - else: - year = int(x[3]) - hour = 0 - minute = 0 - - - - file_date = datetime.datetime(year,month,day,int(hour),int(minute)) - # size = round(size,2) - #file_date = datetime.datetime(year,month,day,hour,minute) - now = datetime.datetime.now() - age = (now - file_date ).days - - return {"size":size,"age":age} - return None - def evaluate(self,path): - cmd = "find :path -print0|xargs -0 ls -ls |awk '{print $6,$7,$8,$9,$10}'".replace(":path",path) - handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) - ostream = handler.communicate()[0].split('\n') - ostream = [row for row in ostream if row.strip() != ''] - #return [self.split(stream) for stream in ostream if stream.strip() != '' and '.DS_Store' not in stream and 'total' not in stream] - #return [self.split(stream) for stream in ostream if path not in stream and not set(['','total','.DS_Store']) & set(stream.split(' '))] - return [] - def toMB(self,size): - m = {'GB':1000,'TB':1000000} - v,u = size.split(' ') - return round(float(v)* m[u.upper()],2) + def evaluate(self,dir_path,r=[]): + """ + This function will recursively scan a folder and retrieve file sizes and age of the files. + The data will be returned as an array of {size,age,label} items + """ + for child in os.listdir(dir_path): + path = os.path.join(dir_path, child) + if os.path.isdir(path): + self.evaluate(path,r) + + else: + size = os.path.getsize(path) + file_date = os.path.getatime(path) + file_date = datetime.datetime.fromtimestamp(file_date) + now = datetime.datetime.now() + age = (now - file_date ).days + r.append({"label":path,"size":size,"age":age,"date":self.getNow()}) + return r def reboot(self,rows,limit) : - return np.sum([ int(self.toMB(item['size']) > self.toMB(limit)) for item in rows]) > 0 + return np.sum([ 1 for item in rows if rows['size'] > limit ]) > 0 def composite(self): - d = [] #-- vector of details (age,size) - - now = datetime.datetime.today() - for folder in self.folders: - if os.path.exists(folder): - xo_raw = self.evaluate(folder) - xo = np.array(ML.Extract(['size','age'],xo_raw)) - if len(xo) == 0: - continue - name = re.findall("([a-z,A-Z,0-9]+)",folder) - name = folder.split(os.sep) - if len(name) == 1: - name = [folder] - else: - i = len(name) -1 - name = [name[i-1]+' '+name[i]] - - name = name[0] - size = round(np.sum(xo[:,0]),2) - if size > 1000 : - size = round(size/1000,2) - units = ' GB' - elif size > 1000000: - size = round(size/1000000,2) - units = ' TB' - else: - size = size - units = ' MB' - size = str(size)+ units - age = round(np.mean(xo[:,1]),2) - if age > 30 and age <= 365 : - age = round(age/30,2) - units = ' Months' - elif age > 365 : - age = round(age/365,2) - units = ' Years' - else: - age = age - units = ' Days' - age = str(age)+units - N = len(xo[:,1]) - xo = {"label":folder} #,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}} - xo = dict(xo,**{"size":size,"age":age,"count":N}) - xo["name"] = name - xo['day'] = now.day - xo['month'] = now.month - xo['year'] = now.year - xo['date'] = time.mktime(now.timetuple()) - - d.append(xo) - - return d + out = [] + for folder in self.folders : + + r = self.evaluate(folder,[]) + file_count = len(r) + age = {"mean":np.mean([item['age'] for item in r] ),"var":np.mean([item['age'] for item in r])} + size = {"mean":np.mean([item['size'] for item in r] ),"var":np.mean([item['size'] for item in r])} + out.append({"label":folder,"stats":{"age":age,"size":size,"file_count":file_count},"logs":r}) + return out # class Monitor (Thread): diff --git a/src/utils/agents/actor.py b/src/utils/agents/actor.py index ac4a7da..3eea116 100755 --- a/src/utils/agents/actor.py +++ b/src/utils/agents/actor.py @@ -193,8 +193,8 @@ class Folders(Actor): @pre : isValid """ folder = item['label'] - name = folder.split(os.sep) - name = name[len(name)-1] + name = folder.split(os.sep) + name = name[len(name)-1] signature='-'.join([name,str(item['date']),str(item['count']),'files']) tarball=os.sep.join([folder,'..',signature]) shutil.make_archive(tarball,'tar',folder) diff --git a/test/TestServerMonitor.py b/test/TestServerMonitor.py index 9d59a35..444f235 100755 --- a/test/TestServerMonitor.py +++ b/test/TestServerMonitor.py @@ -4,7 +4,7 @@ from monitor import Env, DetailProcess, ProcessCounter, Sandbox, FileWatch import monitor import os import json -from utils.workers import Top, Learner +# from utils.workers import Top, Learner #from multiprocessing import Lock from threading import Lock path = os.environ['MONITOR_CONFIG_PATH'] @@ -29,8 +29,8 @@ class TestMonitorServer(unittest.TestCase): p = DetailProcess() p.init(['kate','rabbitmq-server','python','apache2','firefox']) r = p.composite() - for row in r: - print row['label'],row['status'],row['proc_count'] + #for row in r: + # print row['label'],row['status'], sum([1 for item in r if item['label']==row['label']]) self.assertTrue(r) def test_ProcessCount(self): @@ -46,22 +46,13 @@ class TestMonitorServer(unittest.TestCase): p = Sandbox() p.init({"sandbox":sandbox_path,"requirements":requirements_path}) p.composite() - def test_StartTop(self): - lock = Lock() - p = Top(CONFIG,lock) - #p.start() - - #p.join() def test_StartLearner(self): - lock = Lock() - p = Learner(CONFIG,lock) - p.start() + pass def test_FileWatch(self): - conf =CONFIG['monitor']['folder'] - path =os.environ['FILE_PATH'] fw = FileWatch() - fw.init([path]) - print fw.composite() + fw.init(CONFIG['folders']) + #r = fw.evaluate('/Users/steve/git/resume') + fw.composite() if __name__ == '__main__' : unittest.main()