|
|
@ -23,30 +23,41 @@ class ML:
|
|
|
|
class AnomalyDetection:
|
|
|
|
class AnomalyDetection:
|
|
|
|
def split(self,data,index=-1,threshold=0.7) :
|
|
|
|
def split(self,data,index=-1,threshold=0.7) :
|
|
|
|
N = len(data)
|
|
|
|
N = len(data)
|
|
|
|
if N < LIMIT:
|
|
|
|
# if N < LIMIT:
|
|
|
|
return None
|
|
|
|
# return None
|
|
|
|
|
|
|
|
|
|
|
|
end = int(N*threshold)
|
|
|
|
end = int(N*threshold)
|
|
|
|
train = data[:end]
|
|
|
|
train = data[:end]
|
|
|
|
test = data[end:]
|
|
|
|
test = data[end:]
|
|
|
|
if index > 0:
|
|
|
|
|
|
|
|
return {"train":train,"test":test,"labels":[]}
|
|
|
|
|
|
|
|
def learn(self,data,conf):
|
|
|
|
|
|
|
|
if 'filter' in conf:
|
|
|
|
|
|
|
|
filter = conf['filter']
|
|
|
|
|
|
|
|
data = ML.Filter(filter['key'],filter['value'],data)
|
|
|
|
|
|
|
|
attr = conf['features']
|
|
|
|
|
|
|
|
label= conf['label']
|
|
|
|
|
|
|
|
labels= ML.Extract([label],data)
|
|
|
|
|
|
|
|
data = ML.Extract(attr,data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r = self.split(data)
|
|
|
|
return {"train":train,"test":test}
|
|
|
|
labels = self.split(labels)
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
p = self.gParameters(r['train'])
|
|
|
|
@param key field name by which the data will be filtered
|
|
|
|
test = self.gPx(p['mean'],p['cov'],r['test'])
|
|
|
|
@param value field value for the filter
|
|
|
|
return self.gPerformance(test,labels['test'])
|
|
|
|
@param features features to be used in the analysis
|
|
|
|
|
|
|
|
@param labels used to assess performance
|
|
|
|
|
|
|
|
@TODO: Map/Reduce does a good job at filtering
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
def learn(self,data,key,value,features,label):
|
|
|
|
|
|
|
|
xo = ML.Filter(key,value,data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# attr = conf['features']
|
|
|
|
|
|
|
|
# label= conf['label']
|
|
|
|
|
|
|
|
yo= ML.Extract([label['name']],xo)
|
|
|
|
|
|
|
|
xo = ML.Extract(features,xo)
|
|
|
|
|
|
|
|
yo = self.getLabel(yo,label)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xo = self.split(xo)
|
|
|
|
|
|
|
|
yo = self.split(yo)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
p = self.gParameters(xo['train'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
px = self.gPx(p['mean'],p['cov'],xo['test'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print self.gPerformance(px,yo['test'])
|
|
|
|
|
|
|
|
def getLabel(self,yo,label_conf):
|
|
|
|
|
|
|
|
return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -80,10 +91,10 @@ class AnomalyDetection:
|
|
|
|
fn = 0 # false negative
|
|
|
|
fn = 0 # false negative
|
|
|
|
tn = 0 # true negative
|
|
|
|
tn = 0 # true negative
|
|
|
|
for i in range(0,N):
|
|
|
|
for i in range(0,N):
|
|
|
|
tp += 1 if test[i][1]==labels[i] and test[i][1] == 1
|
|
|
|
tp += 1 if (test[i][1]==labels[i] and test[i][1] == 1) else 0
|
|
|
|
fp += 1 if test[i][1] != labels[i] and test[i][1] == 1
|
|
|
|
fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0
|
|
|
|
fn += 1 if test[i][1] != labels[i] and test[i][1] == 0
|
|
|
|
fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0
|
|
|
|
tn += 1 if test[i][1] == labels[i] and test[i][1] == 0
|
|
|
|
tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0
|
|
|
|
precision = tp / (tp + fp)
|
|
|
|
precision = tp / (tp + fp)
|
|
|
|
recall = tp / (tp + fn)
|
|
|
|
recall = tp / (tp + fn)
|
|
|
|
fscore = (2 * precision * recall)/ (precision + recall)
|
|
|
|
fscore = (2 * precision * recall)/ (precision + recall)
|
|
|
|