Bug fix with learner, @TODO: Determine epsilon i.e the right one to get good f-score

master
Steve L. Nyemba 8 years ago
parent a1acc24da4
commit 54744253ca

@ -16,7 +16,7 @@ class ML:
# @TODO: Make sure this approach works across all transport classes # @TODO: Make sure this approach works across all transport classes
# We may have a potential issue of how the data is stored ... it may not scale # We may have a potential issue of how the data is stored ... it may not scale
# #
return [item[0] for item in data if item[0][attr] == value] return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
@staticmethod @staticmethod
def Extract(lattr,data): def Extract(lattr,data):
return [[row[id] for id in lattr] for row in data] return [[row[id] for id in lattr] for row in data]
@ -27,7 +27,7 @@ class ML:
@TODO: determine computationally determine epsilon @TODO: determine computationally determine epsilon
""" """
class AnomalyDetection: class AnomalyDetection:
def split(self,data,index=-1,threshold=0.7) : def split(self,data,index=-1,threshold=0.8) :
N = len(data) N = len(data)
# if N < LIMIT: # if N < LIMIT:
# return None # return None
@ -47,10 +47,15 @@ class AnomalyDetection:
""" """
def learn(self,data,key,value,features,label): def learn(self,data,key,value,features,label):
xo = ML.Filter(key,value,data) xo = ML.Filter(key,value,data)
if len(xo) < 100 :
if not xo :
return None return None
#if len(xo) < 100 :
#return None
# attr = conf['features'] # attr = conf['features']
# label= conf['label'] # label= conf['label']
yo= ML.Extract([label['name']],xo) yo= ML.Extract([label['name']],xo)
xo = ML.Extract(features,xo) xo = ML.Extract(features,xo)
yo = self.getLabel(yo,label) yo = self.getLabel(yo,label)
@ -58,12 +63,14 @@ class AnomalyDetection:
xo = self.split(xo) xo = self.split(xo)
yo = self.split(yo) yo = self.split(yo)
p = self.gParameters(xo['train']) if xo['train'] :
p = self.gParameters(xo['train'])
px = self.gPx(p['mean'],p['cov'],xo['test']) px = self.gPx(p['mean'],p['cov'],xo['test'])
perf = self.gPerformance(px,yo['test']) perf = self.gPerformance(px,yo['test'])
return {"parameters":p,"performance":perf} return {"parameters":p,"performance":perf}
return None
def getLabel(self,yo,label_conf): def getLabel(self,yo,label_conf):
return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ] return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
@ -72,7 +79,7 @@ class AnomalyDetection:
This function will compute the probability density function given a particular event/set of events This function will compute the probability density function given a particular event/set of events
@pre xu.shape[0] == sigma[0] == sigma[1] @pre xu.shape[0] == sigma[0] == sigma[1]
""" """
def gPx(self,xu,sigma,data,EPSILON=0.05): def gPx(self,xu,sigma,data,EPSILON=0.25):
n = len(data[0]) n = len(data[0])
r = [] r = []
@ -84,6 +91,7 @@ class AnomalyDetection:
d = np.matrix(row - xu) d = np.matrix(row - xu)
d.shape = (n,1) d.shape = (n,1)
b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d)) b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d))
px = float(b/a) px = float(b/a)
r.append([px,int(px < EPSILON)]) r.append([px,int(px < EPSILON)])
return r return r
@ -103,8 +111,8 @@ class AnomalyDetection:
fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0 fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0
fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0 fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0
tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0 tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0
precision = tp / (tp + fp) precision = tp / (tp + fp) if tp + fp > 0 else 1
recall = tp / (tp + fn) recall = tp / (tp + fn) if tp + fp > 0 else 1
fscore = (2 * precision * recall)/ (precision + recall) fscore = (2 * precision * recall)/ (precision + recall)
return {"precision":precision,"recall":recall,"fscore":fscore} return {"precision":precision,"recall":recall,"fscore":fscore}
@ -124,4 +132,5 @@ class AnomalyDetection:
# #
m = np.array([ (m[i,:] - u[i])/r[i] for i in range(0,n)]) m = np.array([ (m[i,:] - u[i])/r[i] for i in range(0,n)])
sigma = np.cov(m) sigma = np.cov(m)
return {"cov":sigma,"mean":u} sigma = [ list(row) for row in sigma]
return {"cov":sigma,"mean":list(u)}

@ -1,6 +1,7 @@
#import multiprocessing #import multiprocessing
from threading import Thread, Lock from threading import Thread, Lock
from utils import transport from utils import transport
from utils.ml import AnomalyDetection
import time import time
import monitor import monitor
import sys import sys
@ -78,17 +79,33 @@ class Learner(Thread) :
r = {} r = {}
for key in data : for key in data :
logs = data[key] logs = data[key]
r[key] = {}
for app in self.apps: for app in self.apps:
handler = AnomalyDetection() handler = AnomalyDetection()
r[key][app] = lhandler.learn(data,'label',app,self.features,self.yo) value = handler.learn(logs,'label',app,self.features,self.yo)
if value is not None:
print value
if key not in r:
r[key] = {}
r[key][app] = value
# #
# At this point we've already learnt every thing we need to learn # At this point we've already learnt every thing we need to learn
# #
self.lock.aquire()
writer = sef.factory.instance(type.self.write_class,args=self.rw_args) if r.keys() :
writer.write('learn',r)
self.lock.release() self.lock.acquire()
writer = self.factory.instance(type=self.write_class,args=self.rw_args)
writer.write(label='learn',row=r)
self.lock.release()
if 'MONITOR_CONFIG_PATH' in os.environ:
#
# This suggests we are in development mode
#
break
TIME_ELLAPSED = 60*120 #-- Every 2 hours TIME_ELLAPSED = 60*120 #-- Every 2 hours
time.sleep(TIME_ELLAPSED) time.sleep(TIME_ELLAPSED)

@ -5,7 +5,8 @@ import monitor
import os import os
import json import json
from utils.workers import Top, Learner from utils.workers import Top, Learner
from multiprocessing import Lock #from multiprocessing import Lock
from threading import Lock
path = os.environ['MONITOR_CONFIG_PATH'] path = os.environ['MONITOR_CONFIG_PATH']
f = open(path) f = open(path)
CONFIG = json.loads( f.read()) CONFIG = json.loads( f.read())
@ -46,7 +47,14 @@ class TestMonitorServer(unittest.TestCase):
def test_StartTop(self): def test_StartTop(self):
lock = Lock() lock = Lock()
p = Top(CONFIG,lock) p = Top(CONFIG,lock)
#p.start()
#p.join()
def test_StartLearner(self):
lock = Lock()
p = Learner(CONFIG,lock)
p.start() p.start()
p.join()
if __name__ == '__main__' : if __name__ == '__main__' :
unittest.main() unittest.main()

Loading…
Cancel
Save