| 
						
						
							
								
							
						
						
					 | 
					 | 
					@ -16,7 +16,7 @@ class ML:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# @TODO: Make sure this approach works across all transport classes
 | 
					 | 
					 | 
					 | 
							# @TODO: Make sure this approach works across all transport classes
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# We may have a potential issue of how the data is stored ... it may not scale
 | 
					 | 
					 | 
					 | 
							# We may have a potential issue of how the data is stored ... it may not scale
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							#
 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							value = ML.CleanupName(value)
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							#return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
 | 
					 | 
					 | 
					 | 
							#return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							return [[item for item in row if item[attr] == value][0] for row in data]
 | 
					 | 
					 | 
					 | 
							return [[item for item in row if item[attr] == value][0] for row in data]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						@staticmethod
 | 
					 | 
					 | 
					 | 
						@staticmethod
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -24,7 +24,9 @@ class ML:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if isinstance(lattr,basestring):
 | 
					 | 
					 | 
					 | 
							if isinstance(lattr,basestring):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								lattr = [lattr]
 | 
					 | 
					 | 
					 | 
								lattr = [lattr]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							return [[row[id] for id in lattr] for row in data]
 | 
					 | 
					 | 
					 | 
							return [[row[id] for id in lattr] for row in data]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
						@staticmethod
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						def CleanupName(value) :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							return value.replace('$','').replace('.+','')
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						
 | 
					 | 
					 | 
					 | 
						
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					"""
 | 
					 | 
					 | 
					 | 
					"""
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						Implements a multivariate anomaly detection
 | 
					 | 
					 | 
					 | 
						Implements a multivariate anomaly detection
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -32,7 +34,7 @@ class ML:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					"""
 | 
					 | 
					 | 
					 | 
					"""
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					class AnomalyDetection:
 | 
					 | 
					 | 
					 | 
					class AnomalyDetection:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						def split(self,data,index=-1,threshold=0.9) :
 | 
					 | 
					 | 
					 | 
						def split(self,data,index=-1,threshold=0.65) :
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							N	= len(data)
 | 
					 | 
					 | 
					 | 
							N	= len(data)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# if N < LIMIT:
 | 
					 | 
					 | 
					 | 
							# if N < LIMIT:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# 	return None
 | 
					 | 
					 | 
					 | 
							# 	return None
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -52,13 +54,13 @@ class AnomalyDetection:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						@TODO: Map/Reduce does a good job at filtering
 | 
					 | 
					 | 
					 | 
						@TODO: Map/Reduce does a good job at filtering
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						"""
 | 
					 | 
					 | 
					 | 
						"""
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						def learn(self,data,key,value,features,label):
 | 
					 | 
					 | 
					 | 
						def learn(self,data,key,value,features,label):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							xo = ML.Filter(key,value,data)
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if not xo or len(xo) < 100:
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								return None
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							#if len(xo) < 100 :
 | 
					 | 
					 | 
					 | 
							if len(data) < 10:
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								#return None
 | 
					 | 
					 | 
					 | 
								return None
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							xo = ML.Filter(key,value,data)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							if len(xo) < 10 :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								return None
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# attr = conf['features']
 | 
					 | 
					 | 
					 | 
							# attr = conf['features']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							# label= conf['label']
 | 
					 | 
					 | 
					 | 
							# label= conf['label']
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							
 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -69,9 +71,10 @@ class AnomalyDetection:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							xo = self.split(xo)
 | 
					 | 
					 | 
					 | 
							xo = self.split(xo)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							yo = self.split(yo)
 | 
					 | 
					 | 
					 | 
							yo = self.split(yo)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							p = self.gParameters(xo['train'])
 | 
					 | 
					 | 
					 | 
							p = self.gParameters(xo['train'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							has_cov =  np.linalg.det(p['cov']) #-- making sure the matrix is invertible
 | 
					 | 
					 | 
					 | 
							has_cov =   np.linalg.det(p['cov']) if p else False #-- making sure the matrix is invertible
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							if xo['train'] and has_cov :
 | 
					 | 
					 | 
					 | 
							if xo['train'] and has_cov :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								E = 0.001
 | 
					 | 
					 | 
					 | 
								E = 0.001
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								ACCEPTABLE_FSCORE = 0.6
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								fscore = 0
 | 
					 | 
					 | 
					 | 
								fscore = 0
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								#
 | 
					 | 
					 | 
					 | 
								#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								# We need to find an appropriate epsilon for the predictions
 | 
					 | 
					 | 
					 | 
								# We need to find an appropriate epsilon for the predictions
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					 | 
					@ -94,22 +97,31 @@ class AnomalyDetection:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									
 | 
					 | 
					 | 
					 | 
									
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									
 | 
					 | 
					 | 
					 | 
									
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									__operf__ = self.gPerformance(px,yo['test'])
 | 
					 | 
					 | 
					 | 
									__operf__ = self.gPerformance(px,yo['test'])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									print __operf__
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									if __operf__['fscore'] == 1 :
 | 
					 | 
					 | 
					 | 
									if __operf__['fscore'] == 1 :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										break
 | 
					 | 
					 | 
					 | 
										continue
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									if perf is None :
 | 
					 | 
					 | 
					 | 
									if perf is None :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										perf = __operf__['fscore']
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									elif perf['fscore'] < __perf__['fscore'] and __operf__['fscore']> 0.5 :
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
										perf = __operf__
 | 
					 | 
					 | 
					 | 
										perf = __operf__
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									
 | 
					 | 
					 | 
					 | 
									elif perf['fscore'] < __operf__['fscore'] and __operf__['fscore'] > ACCEPTABLE_FSCORE :
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
										perf = __operf__
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									perf['epsilon'] = Epsilon
 | 
					 | 
					 | 
					 | 
									perf['epsilon'] = Epsilon
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								# At this point we are assuming we came out of the whole thing with an acceptable performance
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								# The understanding is that error drives performance thus we reject fscore==1
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								
 | 
					 | 
					 | 
					 | 
								
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								
 | 
					 | 
					 | 
					 | 
								if perf and perf['fscore'] > ACCEPTABLE_FSCORE :
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								if perf and perf['fscore'] > 0.5 :
 | 
					 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									return {"label":value,"parameters":p,"performance":perf}
 | 
					 | 
					 | 
					 | 
									return {"label":value,"parameters":p,"performance":perf}
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								else:
 | 
					 | 
					 | 
					 | 
								else:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
									return None
 | 
					 | 
					 | 
					 | 
									return None
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							return None
 | 
					 | 
					 | 
					 | 
							return None
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						"""
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							This function determines if the preconditions for learning are met
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							For that parameters are passed to the function
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							p
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						"""
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
						def canLearn(self,p) :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							pass
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
						def getLabel(self,yo,label_conf):
 | 
					 | 
					 | 
					 | 
						def getLabel(self,yo,label_conf):
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
 | 
					 | 
					 | 
					 | 
							return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					 | 
					@ -188,8 +200,15 @@ class AnomalyDetection:
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
								return None
 | 
					 | 
					 | 
					 | 
								return None
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							r = np.array([ np.sqrt(np.var(m[i,:])) for i in range(0,n)])
 | 
					 | 
					 | 
					 | 
							r = np.array([ np.sqrt(np.var(m[i,:])) for i in range(0,n)])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							#
 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# Before we normalize the data we must insure there's is some level of movement in this application
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							# A lack of movement suggests we may not bave enough information to do anything
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							if 0 in r :
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
								return None
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							#-- Normalizing the matrix then we will compute covariance matrix
 | 
					 | 
					 | 
					 | 
							#-- Normalizing the matrix then we will compute covariance matrix
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							#
 | 
					 | 
					 | 
					 | 
							#
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
					 | 
							
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							m = np.array([ (m[i,:] - u[i])/r[i] for i in range(0,n)])
 | 
					 | 
					 | 
					 | 
							m = np.array([ (m[i,:] - u[i])/r[i] for i in range(0,n)])
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							sigma = np.cov(m)
 | 
					 | 
					 | 
					 | 
							sigma = np.cov(m)
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					 | 
							sigma = [ list(row) for row in sigma]
 | 
					 | 
					 | 
					 | 
							sigma = [ list(row) for row in sigma]
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
						
					 | 
					 | 
					
 
 |