|
|
@ -16,7 +16,7 @@ class ML:
|
|
|
|
# @TODO: Make sure this approach works across all transport classes
|
|
|
|
# @TODO: Make sure this approach works across all transport classes
|
|
|
|
# We may have a potential issue of how the data is stored ... it may not scale
|
|
|
|
# We may have a potential issue of how the data is stored ... it may not scale
|
|
|
|
#
|
|
|
|
#
|
|
|
|
return [item[0] for item in data if item[0][attr] == value]
|
|
|
|
return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|
def Extract(lattr,data):
|
|
|
|
def Extract(lattr,data):
|
|
|
|
return [[row[id] for id in lattr] for row in data]
|
|
|
|
return [[row[id] for id in lattr] for row in data]
|
|
|
@ -27,7 +27,7 @@ class ML:
|
|
|
|
@TODO: determine computationally determine epsilon
|
|
|
|
@TODO: determine computationally determine epsilon
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
class AnomalyDetection:
|
|
|
|
class AnomalyDetection:
|
|
|
|
def split(self,data,index=-1,threshold=0.7) :
|
|
|
|
def split(self,data,index=-1,threshold=0.8) :
|
|
|
|
N = len(data)
|
|
|
|
N = len(data)
|
|
|
|
# if N < LIMIT:
|
|
|
|
# if N < LIMIT:
|
|
|
|
# return None
|
|
|
|
# return None
|
|
|
@ -47,10 +47,15 @@ class AnomalyDetection:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
def learn(self,data,key,value,features,label):
|
|
|
|
def learn(self,data,key,value,features,label):
|
|
|
|
xo = ML.Filter(key,value,data)
|
|
|
|
xo = ML.Filter(key,value,data)
|
|
|
|
if len(xo) < 100 :
|
|
|
|
|
|
|
|
|
|
|
|
if not xo :
|
|
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if len(xo) < 100 :
|
|
|
|
|
|
|
|
#return None
|
|
|
|
# attr = conf['features']
|
|
|
|
# attr = conf['features']
|
|
|
|
# label= conf['label']
|
|
|
|
# label= conf['label']
|
|
|
|
|
|
|
|
|
|
|
|
yo= ML.Extract([label['name']],xo)
|
|
|
|
yo= ML.Extract([label['name']],xo)
|
|
|
|
xo = ML.Extract(features,xo)
|
|
|
|
xo = ML.Extract(features,xo)
|
|
|
|
yo = self.getLabel(yo,label)
|
|
|
|
yo = self.getLabel(yo,label)
|
|
|
@ -58,12 +63,14 @@ class AnomalyDetection:
|
|
|
|
xo = self.split(xo)
|
|
|
|
xo = self.split(xo)
|
|
|
|
yo = self.split(yo)
|
|
|
|
yo = self.split(yo)
|
|
|
|
|
|
|
|
|
|
|
|
p = self.gParameters(xo['train'])
|
|
|
|
if xo['train'] :
|
|
|
|
|
|
|
|
p = self.gParameters(xo['train'])
|
|
|
|
px = self.gPx(p['mean'],p['cov'],xo['test'])
|
|
|
|
|
|
|
|
|
|
|
|
px = self.gPx(p['mean'],p['cov'],xo['test'])
|
|
|
|
perf = self.gPerformance(px,yo['test'])
|
|
|
|
|
|
|
|
return {"parameters":p,"performance":perf}
|
|
|
|
perf = self.gPerformance(px,yo['test'])
|
|
|
|
|
|
|
|
return {"parameters":p,"performance":perf}
|
|
|
|
|
|
|
|
return None
|
|
|
|
def getLabel(self,yo,label_conf):
|
|
|
|
def getLabel(self,yo,label_conf):
|
|
|
|
return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
|
|
|
|
return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
|
|
|
|
|
|
|
|
|
|
|
@ -72,7 +79,7 @@ class AnomalyDetection:
|
|
|
|
This function will compute the probability density function given a particular event/set of events
|
|
|
|
This function will compute the probability density function given a particular event/set of events
|
|
|
|
@pre xu.shape[0] == sigma[0] == sigma[1]
|
|
|
|
@pre xu.shape[0] == sigma[0] == sigma[1]
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
def gPx(self,xu,sigma,data,EPSILON=0.05):
|
|
|
|
def gPx(self,xu,sigma,data,EPSILON=0.25):
|
|
|
|
n = len(data[0])
|
|
|
|
n = len(data[0])
|
|
|
|
|
|
|
|
|
|
|
|
r = []
|
|
|
|
r = []
|
|
|
@ -84,6 +91,7 @@ class AnomalyDetection:
|
|
|
|
d = np.matrix(row - xu)
|
|
|
|
d = np.matrix(row - xu)
|
|
|
|
d.shape = (n,1)
|
|
|
|
d.shape = (n,1)
|
|
|
|
b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d))
|
|
|
|
b = np.exp((-0.5*np.transpose(d)) * (np.linalg.inv(sigma)*d))
|
|
|
|
|
|
|
|
|
|
|
|
px = float(b/a)
|
|
|
|
px = float(b/a)
|
|
|
|
r.append([px,int(px < EPSILON)])
|
|
|
|
r.append([px,int(px < EPSILON)])
|
|
|
|
return r
|
|
|
|
return r
|
|
|
@ -103,8 +111,8 @@ class AnomalyDetection:
|
|
|
|
fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0
|
|
|
|
fp += 1 if (test[i][1] != labels[i] and test[i][1] == 1) else 0
|
|
|
|
fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0
|
|
|
|
fn += 1 if (test[i][1] != labels[i] and test[i][1] == 0) else 0
|
|
|
|
tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0
|
|
|
|
tn += 1 if (test[i][1] == labels[i] and test[i][1] == 0) else 0
|
|
|
|
precision = tp / (tp + fp)
|
|
|
|
precision = tp / (tp + fp) if tp + fp > 0 else 1
|
|
|
|
recall = tp / (tp + fn)
|
|
|
|
recall = tp / (tp + fn) if tp + fp > 0 else 1
|
|
|
|
fscore = (2 * precision * recall)/ (precision + recall)
|
|
|
|
fscore = (2 * precision * recall)/ (precision + recall)
|
|
|
|
return {"precision":precision,"recall":recall,"fscore":fscore}
|
|
|
|
return {"precision":precision,"recall":recall,"fscore":fscore}
|
|
|
|
|
|
|
|
|
|
|
@ -124,4 +132,5 @@ class AnomalyDetection:
|
|
|
|
#
|
|
|
|
#
|
|
|
|
m = np.array([ (m[i,:] - u[i])/r[i] for i in range(0,n)])
|
|
|
|
m = np.array([ (m[i,:] - u[i])/r[i] for i in range(0,n)])
|
|
|
|
sigma = np.cov(m)
|
|
|
|
sigma = np.cov(m)
|
|
|
|
return {"cov":sigma,"mean":u}
|
|
|
|
sigma = [ list(row) for row in sigma]
|
|
|
|
|
|
|
|
return {"cov":sigma,"mean":list(u)}
|
|
|
|