Bug fix and enhancement with folder monitoring

data-collector
Steve Nyemba 7 years ago
parent 726895d862
commit c3ce3227ff

@ -21,7 +21,7 @@ from threading import Thread, RLock
import monitor
import utils.agents.actor as actor
from utils.agents.manager import Manager
SYS_ARGS['host']='localhost'
SYS_ARGS['host']='localhost'/
ENDPOINT="http://:host/monitor".replace(":host",SYS_ARGS['host'])
class Collector(Thread) :
def __init__(self):

@ -29,7 +29,7 @@ class Analysis:
self.now = {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
def getNow(self):
d = datetime.datetime.now()
return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute}
return {"month":d.month,"year":d.year, "day":d.day,"hour":d.hour,"minute":d.minute,"second":d.second}
def getName(self):
return self.__class__.__name__
def reboot(self,row,conf) :
@ -167,7 +167,6 @@ class DetailProcess(Analysis):
def init (self,names):
#Analysis.init(self)
self.names = names;
def getName(self):
return "apps"
@ -218,7 +217,7 @@ class DetailProcess(Analysis):
cmd = "ps -eo user,pid,pmem,pcpu,vsize,stat,command|grep -Ei \":app\"".replace(":app",name)
handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
logs = handler.communicate()[0].split('\n')
logs = [row for row in logs if (row.strip() != '') and ('grep -Ei' in row )== False and (__file__ not in row)]
logs = [row for row in logs if (row.strip() != '') and ('grep -Ei' in row )== False ]
if len(logs) == 0:
return [dict(self.parse('',fields),**{'label':name}) ]
@ -255,113 +254,43 @@ class DetailProcess(Analysis):
class FileWatch(Analysis):
def __init__(self):
pass
def init(self,folders):
def init(self,folders):
self.folders = folders;
self.cache = []
def getName(self):
return "folders"
def split(self,row):
x = row.split(' ')
r = {}
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
if x:
BYTES_TO_MB = 1000000
size = int(x[0])/BYTES_TO_MB
month = months.index(x[1]) + 1
day = int(x[2])
age = -1
hour=minute = 0
if ':' in x[3] :
hour,minute = x[3].split(':')
now = datetime.datetime.today()
if month == now.month :
year = now.year
else:
year = now.year - 1
else:
year = int(x[3])
hour = 0
minute = 0
file_date = datetime.datetime(year,month,day,int(hour),int(minute))
# size = round(size,2)
#file_date = datetime.datetime(year,month,day,hour,minute)
now = datetime.datetime.now()
age = (now - file_date ).days
return {"size":size,"age":age}
return None
def evaluate(self,path):
cmd = "find :path -print0|xargs -0 ls -ls |awk '{print $6,$7,$8,$9,$10}'".replace(":path",path)
handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)
ostream = handler.communicate()[0].split('\n')
ostream = [row for row in ostream if row.strip() != '']
#return [self.split(stream) for stream in ostream if stream.strip() != '' and '.DS_Store' not in stream and 'total' not in stream]
#return [self.split(stream) for stream in ostream if path not in stream and not set(['','total','.DS_Store']) & set(stream.split(' '))]
return []
def toMB(self,size):
m = {'GB':1000,'TB':1000000}
v,u = size.split(' ')
return round(float(v)* m[u.upper()],2)
def evaluate(self,dir_path,r=[]):
"""
This function will recursively scan a folder and retrieve file sizes and age of the files.
The data will be returned as an array of {size,age,label} items
"""
for child in os.listdir(dir_path):
path = os.path.join(dir_path, child)
if os.path.isdir(path):
self.evaluate(path,r)
else:
size = os.path.getsize(path)
file_date = os.path.getatime(path)
file_date = datetime.datetime.fromtimestamp(file_date)
now = datetime.datetime.now()
age = (now - file_date ).days
r.append({"label":path,"size":size,"age":age,"date":self.getNow()})
return r
def reboot(self,rows,limit) :
return np.sum([ int(self.toMB(item['size']) > self.toMB(limit)) for item in rows]) > 0
return np.sum([ 1 for item in rows if rows['size'] > limit ]) > 0
def composite(self):
d = [] #-- vector of details (age,size)
now = datetime.datetime.today()
for folder in self.folders:
if os.path.exists(folder):
xo_raw = self.evaluate(folder)
xo = np.array(ML.Extract(['size','age'],xo_raw))
if len(xo) == 0:
continue
name = re.findall("([a-z,A-Z,0-9]+)",folder)
name = folder.split(os.sep)
if len(name) == 1:
name = [folder]
else:
i = len(name) -1
name = [name[i-1]+' '+name[i]]
name = name[0]
size = round(np.sum(xo[:,0]),2)
if size > 1000 :
size = round(size/1000,2)
units = ' GB'
elif size > 1000000:
size = round(size/1000000,2)
units = ' TB'
else:
size = size
units = ' MB'
size = str(size)+ units
age = round(np.mean(xo[:,1]),2)
if age > 30 and age <= 365 :
age = round(age/30,2)
units = ' Months'
elif age > 365 :
age = round(age/365,2)
units = ' Years'
else:
age = age
units = ' Days'
age = str(age)+units
N = len(xo[:,1])
xo = {"label":folder} #,"details":xo_raw,"summary":{"size":size,"age":age,"count":len(xo[:,1])}}
xo = dict(xo,**{"size":size,"age":age,"count":N})
xo["name"] = name
xo['day'] = now.day
xo['month'] = now.month
xo['year'] = now.year
xo['date'] = time.mktime(now.timetuple())
d.append(xo)
return d
out = []
for folder in self.folders :
r = self.evaluate(folder,[])
file_count = len(r)
age = {"mean":np.mean([item['age'] for item in r] ),"var":np.mean([item['age'] for item in r])}
size = {"mean":np.mean([item['size'] for item in r] ),"var":np.mean([item['size'] for item in r])}
out.append({"label":folder,"stats":{"age":age,"size":size,"file_count":file_count},"logs":r})
return out
# class Monitor (Thread):

@ -193,8 +193,8 @@ class Folders(Actor):
@pre : isValid
"""
folder = item['label']
name = folder.split(os.sep)
name = name[len(name)-1]
name = folder.split(os.sep)
name = name[len(name)-1]
signature='-'.join([name,str(item['date']),str(item['count']),'files'])
tarball=os.sep.join([folder,'..',signature])
shutil.make_archive(tarball,'tar',folder)

@ -4,7 +4,7 @@ from monitor import Env, DetailProcess, ProcessCounter, Sandbox, FileWatch
import monitor
import os
import json
from utils.workers import Top, Learner
# from utils.workers import Top, Learner
#from multiprocessing import Lock
from threading import Lock
path = os.environ['MONITOR_CONFIG_PATH']
@ -29,8 +29,8 @@ class TestMonitorServer(unittest.TestCase):
p = DetailProcess()
p.init(['kate','rabbitmq-server','python','apache2','firefox'])
r = p.composite()
for row in r:
print row['label'],row['status'],row['proc_count']
#for row in r:
# print row['label'],row['status'], sum([1 for item in r if item['label']==row['label']])
self.assertTrue(r)
def test_ProcessCount(self):
@ -46,22 +46,13 @@ class TestMonitorServer(unittest.TestCase):
p = Sandbox()
p.init({"sandbox":sandbox_path,"requirements":requirements_path})
p.composite()
def test_StartTop(self):
lock = Lock()
p = Top(CONFIG,lock)
#p.start()
#p.join()
def test_StartLearner(self):
lock = Lock()
p = Learner(CONFIG,lock)
p.start()
pass
def test_FileWatch(self):
conf =CONFIG['monitor']['folder']
path =os.environ['FILE_PATH']
fw = FileWatch()
fw.init([path])
print fw.composite()
fw.init(CONFIG['folders'])
#r = fw.evaluate('/Users/steve/git/resume')
fw.composite()
if __name__ == '__main__' :
unittest.main()

Loading…
Cancel
Save