From f3d9e03b95634d979bb71660c509cfa8e828064a Mon Sep 17 00:00:00 2001 From: "Steve L. Nyemba" Date: Fri, 13 Jan 2017 07:29:02 -0600 Subject: [PATCH] learner testing --- src/utils/ml.py | 53 +++++++++++++++++++++++++----------------- src/utils/transport.py | 3 ++- test/TestML.py | 19 +++++++++++---- 3 files changed, 49 insertions(+), 26 deletions(-) diff --git a/src/utils/ml.py b/src/utils/ml.py index 15eeea7..8125dbf 100644 --- a/src/utils/ml.py +++ b/src/utils/ml.py @@ -23,30 +23,41 @@ class ML: class AnomalyDetection: def split(self,data,index=-1,threshold=0.7) : N = len(data) - if N < LIMIT: - return None + # if N < LIMIT: + # return None end = int(N*threshold) train = data[:end] test = data[end:] - if index > 0: - return {"train":train,"test":test,"labels":[]} - def learn(self,data,conf): - if 'filter' in conf: - filter = conf['filter'] - data = ML.Filter(filter['key'],filter['value'],data) - attr = conf['features'] - label= conf['label'] - labels= ML.Extract([label],data) - data = ML.Extract(attr,data) - r = self.split(data) - labels = self.split(labels) + return {"train":train,"test":test} + """ - p = self.gParameters(r['train']) - test = self.gPx(p['mean'],p['cov'],r['test']) - return self.gPerformance(test,labels['test']) + @param key field name by which the data will be filtered + @param value field value for the filter + @param features features to be used in the analysis + @param labels used to assess performance + @TODO: Map/Reduce does a good job at filtering + """ + def learn(self,data,key,value,features,label): + xo = ML.Filter(key,value,data) + + # attr = conf['features'] + # label= conf['label'] + yo= ML.Extract([label['name']],xo) + xo = ML.Extract(features,xo) + yo = self.getLabel(yo,label) + + xo = self.split(xo) + yo = self.split(yo) + p = self.gParameters(xo['train']) + + px = self.gPx(p['mean'],p['cov'],xo['test']) + + print self.gPerformance(px,yo['test']) + def getLabel(self,yo,label_conf): + return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ] """ @@ -80,10 +91,10 @@ class AnomalyDetection: fn = 0 # false negative tn = 0 # true negative for i in range(0,N): - tp += 1 if test[i][1]==labels[i] and test[i][1] == 1 - fp += 1 if test[i][1] != labels[i] and test[i][1] == 1 - fn += 1 if test[i][1] != labels[i] and test[i][1] == 0 - tn += 1 if test[i][1] == labels[i] and test[i][1] == 0 + tp += 1 if (test[i][1]==labels[i] and test[i][1] == 1) else 0 + fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0 + fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0 + tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0 precision = tp / (tp + fp) recall = tp / (tp + fn) fscore = (2 * precision * recall)/ (precision + recall) diff --git a/src/utils/transport.py b/src/utils/transport.py index b794d53..6b990e9 100644 --- a/src/utils/transport.py +++ b/src/utils/transport.py @@ -250,7 +250,8 @@ class MessageQueue: resp = self.connection is not None and self.connection.is_open self.close() return resp - def close(self): + def close(self): + self.channel.close() self.connection.close() """ diff --git a/test/TestML.py b/test/TestML.py index e374213..e3ea79b 100644 --- a/test/TestML.py +++ b/test/TestML.py @@ -1,5 +1,5 @@ from utils import transport -from utils.ml import ML +from utils.ml import ML, AnomalyDetection import unittest import json import os @@ -10,7 +10,6 @@ f = open(path) CONFIG = json.loads( f.read()) f.close() factory = transport.DataSourceFactory() -#greader = factory.instance(type=ref,args=p) class TestML(unittest.TestCase): def setUp(self): @@ -34,9 +33,21 @@ class TestML(unittest.TestCase): r = r['apps'] x = ML.Filter('label','Google Chrome',r) x_ = ML.Extract(['cpu_usage','memory_usage'], x) - print x[0] - print x_ + self.assertTrue (len (x) == len(x_)) pass + def test_Learn(self): + ref = CONFIG['store']['class']['read'] + p = CONFIG['store']['args'] + greader = factory.instance(type=ref,args=p) + + data = greader.read() + + data = data['apps'] + lhandler = AnomalyDetection() + features = CONFIG['learner']['anomalies']['features'] + label = CONFIG['learner']['anomalies']['label'] + lhandler.learn(data,'label','Google Chrome',features,label) + if __name__ == '__main__' :