bug fix around filter/extract @TODO: Work on overfitting

master
Steve L. Nyemba 8 years ago
parent 3f8f975528
commit 4184d0bdc7

@ -18,7 +18,7 @@ class ML:
# #
#return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value] #return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
return [[item for item in row if item[attr] == value] for row in data] return [[item for item in row if item[attr] == value][0] for row in data]
@staticmethod @staticmethod
def Extract(lattr,data): def Extract(lattr,data):
if isinstance(lattr,basestring): if isinstance(lattr,basestring):
@ -32,7 +32,7 @@ class ML:
""" """
class AnomalyDetection: class AnomalyDetection:
def split(self,data,index=-1,threshold=0.8) : def split(self,data,index=-1,threshold=0.9) :
N = len(data) N = len(data)
# if N < LIMIT: # if N < LIMIT:
# return None # return None
@ -53,7 +53,6 @@ class AnomalyDetection:
""" """
def learn(self,data,key,value,features,label): def learn(self,data,key,value,features,label):
xo = ML.Filter(key,value,data) xo = ML.Filter(key,value,data)
print key,value, len(xo)
if not xo or len(xo) < 100: if not xo or len(xo) < 100:
return None return None
@ -69,25 +68,47 @@ class AnomalyDetection:
xo = self.split(xo) xo = self.split(xo)
yo = self.split(yo) yo = self.split(yo)
p = self.gParameters(xo['train'])
if xo['train'] : has_cov = np.linalg.det(p['cov']) #-- making sure the matrix is invertible
E = 0.01 if xo['train'] and has_cov :
E = 0.001
fscore = 0 fscore = 0
#
# We need to find an appropriate epsilon for the predictions
# The appropriate epsilon is one that yields an f-score [0.5,1[
#
__operf__ = None
perf = None
for i in range(0,10): for i in range(0,10):
Epsilon = E + (2*E*i) Epsilon = E + (2*E*i)
p = self.gParameters(xo['train'])
if p is None : if p is None :
return None return None
#
# At this point we've got enough data for the parameters
# We should try to fine tune epsilon for better results
#
px = self.gPx(p['mean'],p['cov'],xo['test'],Epsilon) px = self.gPx(p['mean'],p['cov'],xo['test'],Epsilon)
perf = self.gPerformance(px,yo['test'])
if fscore == 0 :
fscore = perf['fscore']
elif perf['fscore'] > fscore and perf['fscore'] > 0.5 :
perf['epsilon'] = Epsilon __operf__ = self.gPerformance(px,yo['test'])
print __operf__
if __operf__['fscore'] == 1 :
break
if perf is None :
perf = __operf__['fscore']
elif perf['fscore'] < __perf__['fscore'] and __operf__['fscore']> 0.5 :
perf = __operf__
return {"label":value,"parameters":p,"performance":perf} perf['epsilon'] = Epsilon
if perf and perf['fscore'] > 0.5 :
return {"label":value,"parameters":p,"performance":perf}
else:
return None
return None return None
def getLabel(self,yo,label_conf): def getLabel(self,yo,label_conf):
return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ] return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
@ -109,6 +130,7 @@ class AnomalyDetection:
row = np.array(row) row = np.array(row)
d = np.matrix(row - xu) d = np.matrix(row - xu)
d.shape = (n,1) d.shape = (n,1)
b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d)) b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d))
px = float(b/a) px = float(b/a)

@ -25,14 +25,20 @@ class TestML(unittest.TestCase):
def test_Filter(self): def test_Filter(self):
r = self.greader.read() r = self.greader.read()
r = r['apps'] r = r['apps']
x = ML.Filter('label','Google Chrome',r) #
# To make this test case extensible we need to pull apps from the configuration
#
app = CONFIG['monitor']['processes']['config']['apps'][0]
x = ML.Filter('label',app,r)
for row in x: for row in x:
self.assertTrue(row['label'] == 'Google Chrome') self.assertTrue(row['label'] == app)
def test_Extract(self): def test_Extract(self):
r = self.greader.read() r = self.greader.read()
r = r['apps'] r = r['apps']
x = ML.Filter('label','Google Chrome',r) app = CONFIG['monitor']['processes']['config']['apps'][0]
x = ML.Filter('label',app,r)
x_ = ML.Extract(['cpu_usage','memory_usage'], x) x_ = ML.Extract(['cpu_usage','memory_usage'], x)
self.assertTrue (len (x) == len(x_)) self.assertTrue (len (x) == len(x_))
pass pass
def test_Learn(self): def test_Learn(self):
@ -43,11 +49,12 @@ class TestML(unittest.TestCase):
data = greader.read() data = greader.read()
data = data['apps'] data = data['apps']
app = CONFIG['monitor']['processes']['config']['apps'][1]
lhandler = AnomalyDetection() lhandler = AnomalyDetection()
features = CONFIG['learner']['anomalies']['features'] features = CONFIG['learner']['anomalies']['features']
label = CONFIG['learner']['anomalies']['label'] label = CONFIG['learner']['anomalies']['label']
lhandler.learn(data,'label','Google Chrome',features,label) x = lhandler.learn(data,'label',app,features,label)
print x
if __name__ == '__main__' : if __name__ == '__main__' :

Loading…
Cancel
Save