|
|
|
@ -8,15 +8,7 @@ from utils.transport import *
|
|
|
|
|
from utils.ml import AnomalyDetection,ML
|
|
|
|
|
from utils.params import PARAMS
|
|
|
|
|
import time
|
|
|
|
|
"""
|
|
|
|
|
This class is intended to apply anomaly detection to various areas of learning
|
|
|
|
|
The areas of learning that will be skipped are :
|
|
|
|
|
['_id','_rev','learn'] ...
|
|
|
|
|
|
|
|
|
|
@TODO:
|
|
|
|
|
- Find a way to perform dimensionality reduction if need be
|
|
|
|
|
"""
|
|
|
|
|
class Anomalies(Thread) :
|
|
|
|
|
class BaseLearner(Thread):
|
|
|
|
|
def __init__(self,lock) :
|
|
|
|
|
Thread.__init__(self)
|
|
|
|
|
path = PARAMS['path']
|
|
|
|
@ -25,7 +17,31 @@ class Anomalies(Thread) :
|
|
|
|
|
f = open(path)
|
|
|
|
|
self.config = json.loads(f.read())
|
|
|
|
|
f.close()
|
|
|
|
|
else:
|
|
|
|
|
self.config = None
|
|
|
|
|
self.lock = lock
|
|
|
|
|
self.factory = DataSourceFactory()
|
|
|
|
|
self.quit = False
|
|
|
|
|
"""
|
|
|
|
|
This function is designed to stop processing gracefully
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
def stop(self):
|
|
|
|
|
self.quit = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
This class is intended to apply anomaly detection to various areas of learning
|
|
|
|
|
The areas of learning that will be skipped are :
|
|
|
|
|
['_id','_rev','learn'] ...
|
|
|
|
|
|
|
|
|
|
@TODO:
|
|
|
|
|
- Find a way to perform dimensionality reduction if need be
|
|
|
|
|
"""
|
|
|
|
|
class Anomalies(BaseLearner) :
|
|
|
|
|
def __init__(self,lock):
|
|
|
|
|
BaseLearner.__init__(self,lock)
|
|
|
|
|
if self.config :
|
|
|
|
|
#
|
|
|
|
|
# Initializing data store & factory class
|
|
|
|
|
#
|
|
|
|
@ -34,9 +50,9 @@ class Anomalies(Thread) :
|
|
|
|
|
self.rclass = self.config['store']['class']['read']
|
|
|
|
|
self.wclass = self.config['store']['class']['write']
|
|
|
|
|
self.rw_args = self.config['store']['args']
|
|
|
|
|
self.factory = DataSourceFactory()
|
|
|
|
|
# self.factory = DataSourceFactory()
|
|
|
|
|
self.quit = False
|
|
|
|
|
self.lock = lock
|
|
|
|
|
# self.lock = lock
|
|
|
|
|
def format(self,stream):
|
|
|
|
|
pass
|
|
|
|
|
def stop(self):
|
|
|
|
@ -46,7 +62,8 @@ class Anomalies(Thread) :
|
|
|
|
|
DELAY = self.config['delay'] * 60
|
|
|
|
|
reader = self.factory.instance(type=self.rclass,args=self.rw_args)
|
|
|
|
|
data = reader.read()
|
|
|
|
|
key = 'apps'
|
|
|
|
|
key = 'apps@'+self.id
|
|
|
|
|
if key in data:
|
|
|
|
|
rdata = data[key]
|
|
|
|
|
features = ['memory_usage','cpu_usage']
|
|
|
|
|
yo = {"1":["running"],"name":"status"}
|
|
|
|
@ -75,10 +92,27 @@ class Anomalies(Thread) :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
Let's estimate how many files we will have for a given date
|
|
|
|
|
y = ax + b with y: number files, x: date, y: Number of files
|
|
|
|
|
"""
|
|
|
|
|
class Regression(BaseLearner):
|
|
|
|
|
def __init__(self,lock):
|
|
|
|
|
BaseLearner.__init__(self)
|
|
|
|
|
self.folders = self.config['folders']
|
|
|
|
|
self.id = self.config['id']
|
|
|
|
|
def run(self):
|
|
|
|
|
DELAY = self.config['delay'] * 60
|
|
|
|
|
reader = self.factory.instance(type=self.rclass,args=self.rw_args)
|
|
|
|
|
data = reader.read()
|
|
|
|
|
if 'folders' in data :
|
|
|
|
|
data = ML.Filter('id',self.id,data['folders'])
|
|
|
|
|
xo = ML.Extract(['date'],data)
|
|
|
|
|
yo = ML.Extract(['count'],data)
|
|
|
|
|
numpy.linalg.lstsq(xo, yo, rcond=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Regression(Thread):
|
|
|
|
|
def __init__(self,params):
|
|
|
|
|
pass
|
|
|
|
|
if __name__ == '__main__' :
|
|
|
|
|
lock = RLock()
|
|
|
|
|
thread = Anomalies(lock)
|
|
|
|
|