DC - Bug fix: adding statistical variables

data-collector
Steve Nyemba 7 years ago
parent c3ce3227ff
commit 183dc42df0

@ -21,7 +21,7 @@ from threading import Thread, RLock
import monitor import monitor
import utils.agents.actor as actor import utils.agents.actor as actor
from utils.agents.manager import Manager from utils.agents.manager import Manager
SYS_ARGS['host']='localhost'/ SYS_ARGS['host']='localhost'
ENDPOINT="http://:host/monitor".replace(":host",SYS_ARGS['host']) ENDPOINT="http://:host/monitor".replace(":host",SYS_ARGS['host'])
class Collector(Thread) : class Collector(Thread) :
def __init__(self): def __init__(self):

@ -17,6 +17,7 @@ import time
import numpy as np import numpy as np
from utils.ml import ML from utils.ml import ML
import sys import sys
from scipy import stats
class Analysis: class Analysis:
def __init__(self): def __init__(self):
@ -287,9 +288,11 @@ class FileWatch(Analysis):
r = self.evaluate(folder,[]) r = self.evaluate(folder,[])
file_count = len(r) file_count = len(r)
age = {"mean":np.mean([item['age'] for item in r] ),"var":np.mean([item['age'] for item in r])} age_mode = [item[0] for item in stats.mode([item['age'] for item in r])]
size = {"mean":np.mean([item['size'] for item in r] ),"var":np.mean([item['size'] for item in r])} size_mode= [item[0] for item in stats.mode([item['size'] for item in r])]
out.append({"label":folder,"stats":{"age":age,"size":size,"file_count":file_count},"logs":r}) age = {"mode":age_mode,"median":np.median([item['age'] for item in r] ),"mean":np.mean([item['age'] for item in r] ),"var":np.mean([item['age'] for item in r])}
size = {"mode":size_mode,"median":np.median([item['size'] for item in r] ), "mean":np.mean([item['size'] for item in r] ),"var":np.mean([item['size'] for item in r])}
out.append({"label":folder,"date":self.getNow(),"stats":{"age":age,"size":size,"file_count":file_count},"logs":r})
return out return out

@ -192,20 +192,22 @@ class Folders(Actor):
This function will archive all files in a given folder This function will archive all files in a given folder
@pre : isValid @pre : isValid
""" """
folder = item['label'] folder = item['label']
name = folder.split(os.sep) name = folder.split(os.sep)
name = name[len(name)-1] name = name[len(name)-1]
signature='-'.join([name,str(item['date']),str(item['count']),'files']) date = ''.join([str(i) for i in item['date'].values()])
signature='-'.join([name,date,str(item['stats']['file_count']),'files'])
tarball=os.sep.join([folder,'..',signature]) tarball=os.sep.join([folder,'..',signature])
shutil.make_archive(tarball,'tar',folder) shutil.make_archive(tarball,'tar',folder)
self.clean(item) #self.clean(item)
# #
# @TODO: The archive can be uploaded to the cloud or else where # @TODO: The archive can be uploaded to the cloud or else where
# @param id cloud service idenfier {dropbox,box,google-drive,one-drive} # @param id cloud service idenfier {dropbox,box,google-drive,one-drive}
# @param key authorization key for the given service # @param key authorization key for the given service
# #
pass pass
def clean(self,item): def clean(self,item):
""" """
This function consists in deleting files from a given folder This function consists in deleting files from a given folder
@ -239,15 +241,19 @@ class Folders(Actor):
This function returns whether the following : This function returns whether the following :
p : folder exists p : folder exists
q : has_reached threashold q : has_reached threashold
@TODO: Add a user defined configuration element to make this happen
""" """
p = os.path.exists(item['label']) and item['label'] in self.lfolders p = os.path.exists(item['label']) and item['label'] in self.lfolders
q = self.get_size(item['size']) >= self.threshold q = item['stats']['size']['mean'] >= self.threshold and self.threshold > 0
return p and q return p and q
def analyze(self,logs): def analyze(self,logs):
r = {'clean':self.clean,'archive':self.archive} r = {'clean':self.clean,'archive':self.archive}
self.lfolders = [ folder['label'] for folder in logs] self.lfolders = [ folder['label'] for folder in logs]
for item in logs : for item in logs :
if self.can_clean(item) : if self.can_clean(item) :
self.archive(item) self.archive(item)
#self.clean(item) #self.clean(item)

@ -52,7 +52,7 @@ class Manager() :
# meta = [item['metadata'] for item in plans if item['status']=='active' ] # meta = [item['metadata'] for item in plans if item['status']=='active' ]
self.plan = r['plan'] self.plan = r['plan']
meta = self.plan['metadata'] meta = self.plan['metadata']
print meta
if meta : if meta :
self.DELAY = 60* int(meta['delay']) self.DELAY = 60* int(meta['delay'])
self.LIMIT = int(meta['limit']) self.LIMIT = int(meta['limit'])
@ -67,7 +67,7 @@ class Manager() :
self.agents = self.filter('agents',meta,self.agents) self.agents = self.filter('agents',meta,self.agents)
self.actors = self.filter('actors',meta,self.actors) self.actors = self.filter('actors',meta,self.actors)
#self.setup(meta) self.setup(meta)
def filter_collectors(self,meta) : def filter_collectors(self,meta) :
""" """
@ -221,7 +221,7 @@ class Manager() :
COUNT_STOP = int(24*60/ self.DELAY) COUNT_STOP = int(24*60/ self.DELAY)
write_class = self.config['store']['class']['write'] write_class = self.config['store']['class']['write']
read_args = self.config['store']['args'] read_args = self.config['store']['args']
print self.agents
while True : while True :
COUNT += 1 COUNT += 1
if COUNT > COUNT_STOP : if COUNT > COUNT_STOP :

Loading…
Cancel
Save