DC - Bug fix: adding statistical variables

data-collector
Steve Nyemba 7 years ago
parent c3ce3227ff
commit 183dc42df0

@ -21,7 +21,7 @@ from threading import Thread, RLock
import monitor
import utils.agents.actor as actor
from utils.agents.manager import Manager
SYS_ARGS['host']='localhost'/
SYS_ARGS['host']='localhost'
ENDPOINT="http://:host/monitor".replace(":host",SYS_ARGS['host'])
class Collector(Thread) :
def __init__(self):

@ -17,6 +17,7 @@ import time
import numpy as np
from utils.ml import ML
import sys
from scipy import stats
class Analysis:
def __init__(self):
@ -287,9 +288,11 @@ class FileWatch(Analysis):
r = self.evaluate(folder,[])
file_count = len(r)
age = {"mean":np.mean([item['age'] for item in r] ),"var":np.mean([item['age'] for item in r])}
size = {"mean":np.mean([item['size'] for item in r] ),"var":np.mean([item['size'] for item in r])}
out.append({"label":folder,"stats":{"age":age,"size":size,"file_count":file_count},"logs":r})
age_mode = [item[0] for item in stats.mode([item['age'] for item in r])]
size_mode= [item[0] for item in stats.mode([item['size'] for item in r])]
age = {"mode":age_mode,"median":np.median([item['age'] for item in r] ),"mean":np.mean([item['age'] for item in r] ),"var":np.mean([item['age'] for item in r])}
size = {"mode":size_mode,"median":np.median([item['size'] for item in r] ), "mean":np.mean([item['size'] for item in r] ),"var":np.mean([item['size'] for item in r])}
out.append({"label":folder,"date":self.getNow(),"stats":{"age":age,"size":size,"file_count":file_count},"logs":r})
return out

@ -192,13 +192,15 @@ class Folders(Actor):
This function will archive all files in a given folder
@pre : isValid
"""
folder = item['label']
name = folder.split(os.sep)
name = name[len(name)-1]
signature='-'.join([name,str(item['date']),str(item['count']),'files'])
date = ''.join([str(i) for i in item['date'].values()])
signature='-'.join([name,date,str(item['stats']['file_count']),'files'])
tarball=os.sep.join([folder,'..',signature])
shutil.make_archive(tarball,'tar',folder)
self.clean(item)
#self.clean(item)
#
# @TODO: The archive can be uploaded to the cloud or else where
# @param id cloud service idenfier {dropbox,box,google-drive,one-drive}
@ -239,15 +241,19 @@ class Folders(Actor):
This function returns whether the following :
p : folder exists
q : has_reached threashold
@TODO: Add a user defined configuration element to make this happen
"""
p = os.path.exists(item['label']) and item['label'] in self.lfolders
q = self.get_size(item['size']) >= self.threshold
q = item['stats']['size']['mean'] >= self.threshold and self.threshold > 0
return p and q
def analyze(self,logs):
r = {'clean':self.clean,'archive':self.archive}
self.lfolders = [ folder['label'] for folder in logs]
for item in logs :
if self.can_clean(item) :
self.archive(item)
#self.clean(item)

@ -52,7 +52,7 @@ class Manager() :
# meta = [item['metadata'] for item in plans if item['status']=='active' ]
self.plan = r['plan']
meta = self.plan['metadata']
print meta
if meta :
self.DELAY = 60* int(meta['delay'])
self.LIMIT = int(meta['limit'])
@ -67,7 +67,7 @@ class Manager() :
self.agents = self.filter('agents',meta,self.agents)
self.actors = self.filter('actors',meta,self.actors)
#self.setup(meta)
self.setup(meta)
def filter_collectors(self,meta) :
"""
@ -221,7 +221,7 @@ class Manager() :
COUNT_STOP = int(24*60/ self.DELAY)
write_class = self.config['store']['class']['write']
read_args = self.config['store']['args']
print self.agents
while True :
COUNT += 1
if COUNT > COUNT_STOP :

Loading…
Cancel
Save