|
|
|
@ -18,7 +18,7 @@ class ML:
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
#return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
|
|
|
|
|
return [[item for item in row if item[attr] == value] for row in data]
|
|
|
|
|
return [[item for item in row if item[attr] == value][0] for row in data]
|
|
|
|
|
@staticmethod
|
|
|
|
|
def Extract(lattr,data):
|
|
|
|
|
if isinstance(lattr,basestring):
|
|
|
|
@ -32,7 +32,7 @@ class ML:
|
|
|
|
|
"""
|
|
|
|
|
class AnomalyDetection:
|
|
|
|
|
|
|
|
|
|
def split(self,data,index=-1,threshold=0.8) :
|
|
|
|
|
def split(self,data,index=-1,threshold=0.9) :
|
|
|
|
|
N = len(data)
|
|
|
|
|
# if N < LIMIT:
|
|
|
|
|
# return None
|
|
|
|
@ -53,7 +53,6 @@ class AnomalyDetection:
|
|
|
|
|
"""
|
|
|
|
|
def learn(self,data,key,value,features,label):
|
|
|
|
|
xo = ML.Filter(key,value,data)
|
|
|
|
|
print key,value, len(xo)
|
|
|
|
|
|
|
|
|
|
if not xo or len(xo) < 100:
|
|
|
|
|
return None
|
|
|
|
@ -69,25 +68,47 @@ class AnomalyDetection:
|
|
|
|
|
|
|
|
|
|
xo = self.split(xo)
|
|
|
|
|
yo = self.split(yo)
|
|
|
|
|
|
|
|
|
|
if xo['train'] :
|
|
|
|
|
E = 0.01
|
|
|
|
|
p = self.gParameters(xo['train'])
|
|
|
|
|
has_cov = np.linalg.det(p['cov']) #-- making sure the matrix is invertible
|
|
|
|
|
if xo['train'] and has_cov :
|
|
|
|
|
E = 0.001
|
|
|
|
|
fscore = 0
|
|
|
|
|
#
|
|
|
|
|
# We need to find an appropriate epsilon for the predictions
|
|
|
|
|
# The appropriate epsilon is one that yields an f-score [0.5,1[
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
__operf__ = None
|
|
|
|
|
perf = None
|
|
|
|
|
for i in range(0,10):
|
|
|
|
|
Epsilon = E + (2*E*i)
|
|
|
|
|
p = self.gParameters(xo['train'])
|
|
|
|
|
|
|
|
|
|
if p is None :
|
|
|
|
|
return None
|
|
|
|
|
#
|
|
|
|
|
# At this point we've got enough data for the parameters
|
|
|
|
|
# We should try to fine tune epsilon for better results
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
px = self.gPx(p['mean'],p['cov'],xo['test'],Epsilon)
|
|
|
|
|
|
|
|
|
|
perf = self.gPerformance(px,yo['test'])
|
|
|
|
|
if fscore == 0 :
|
|
|
|
|
fscore = perf['fscore']
|
|
|
|
|
elif perf['fscore'] > fscore and perf['fscore'] > 0.5 :
|
|
|
|
|
|
|
|
|
|
perf['epsilon'] = Epsilon
|
|
|
|
|
__operf__ = self.gPerformance(px,yo['test'])
|
|
|
|
|
print __operf__
|
|
|
|
|
if __operf__['fscore'] == 1 :
|
|
|
|
|
break
|
|
|
|
|
if perf is None :
|
|
|
|
|
perf = __operf__['fscore']
|
|
|
|
|
elif perf['fscore'] < __perf__['fscore'] and __operf__['fscore']> 0.5 :
|
|
|
|
|
perf = __operf__
|
|
|
|
|
|
|
|
|
|
return {"label":value,"parameters":p,"performance":perf}
|
|
|
|
|
perf['epsilon'] = Epsilon
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if perf and perf['fscore'] > 0.5 :
|
|
|
|
|
return {"label":value,"parameters":p,"performance":perf}
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
return None
|
|
|
|
|
def getLabel(self,yo,label_conf):
|
|
|
|
|
return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
|
|
|
|
@ -109,6 +130,7 @@ class AnomalyDetection:
|
|
|
|
|
row = np.array(row)
|
|
|
|
|
d = np.matrix(row - xu)
|
|
|
|
|
d.shape = (n,1)
|
|
|
|
|
|
|
|
|
|
b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d))
|
|
|
|
|
|
|
|
|
|
px = float(b/a)
|
|
|
|
|