fixing learning bugs on integration

9 years ago · cbef913877
parent b787ee0608
commit cbef913877
3 changed files with 129 additions and 67 deletions
--- a/src/api/index.py
+++ b/src/api/index.py
@ -23,40 +23,17 @@ import monitor
 import Queue
 from utils.transport import *
 from utils.workers import ThreadManager, Factory
+from utils.ml import ML,AnomalyDetection
+import utils.params as SYS_ARGS
 import atexit
-PARAMS  = {'context':''}
-if len(sys.argv) > 1:
-	
-	N = len(sys.argv)
-	for i in range(1,N):
-		value = None
-		if sys.argv[i].startswith('--'):
-			key = sys.argv[i].replace('-','')
-			
-			if i + 1 < N:
-				value = sys.argv[i + 1] = sys.argv[i+1].strip()
-			if key and value:
-				PARAMS[key] = value
-		
-		i += 2
-		

 app = Flask(__name__)
 app.config['SECRET_KEY'] = '!h8-[0v8]247-4-360'
 #app.secret_key = 'A0Zr98j/3yX R~XHH!jmN]LWX=?RT'

-
+PARAMS = SYS_ARGS.PARAMS
 f = open(PARAMS['path'])
 CONFIG 	= json.loads(f.read())
-#HANDLERS= {}
-
-#for key in CONFIG['monitor'] :
-	
-	#className = CONFIG['monitor'][key]['class']
-	#ref	= "".join(["monitor.",className,"()"])
-	#ref 	=  eval(ref)
-	#HANDLERS[key] = {"class":ref,"config":CONFIG['monitor'][key]["config"]}
-
 f.close()

 #
@ -66,12 +43,7 @@ p = CONFIG['store']['args']
 class_read = CONFIG['store']['class']['read']
 class_write= CONFIG['store']['class']['write']
 factory = DataSourceFactory()
-#gWriter = factory.instance(type='CouchdbWritera',args=p)
-#gReader = factory.instance(type='CouchdbReader',args=p)
-#p['qid'] = HANDLERS['processes']['config'].keys()
 gReader = factory.instance(type=class_read,args=p)
-#gWriter = factory.instance(type=class_write,args=p)
-#mthread = monitor.Monitor(HANDLERS,gWriter,'processes',)

 atexit.register(ThreadManager.stop)
@app.route('/get/<id>')
@ -82,8 +54,12 @@ def procs(id):
 		
 		r = {}
 		for label in d :
+			if label not in ['learn'] :
 				index = len(d[label]) - 1
 				r[label] = d[label][index]
+				#for row in r[label] :
+					#yo = ML.Extract(['status'],row)
+					#xo = ML.Extract(['cpu_usage','memory_usage'],row)
 					
 	except Exception, e:
 		print e
@ -122,13 +98,14 @@ def trends ():
 	p = CONFIG['store']['args']
 	class_read = CONFIG['store']['class']['read']

-	p['qid'] =[id] #HANDLERS['processes']['config'].keys()
+	
 	gReader = factory.instance(type=class_read,args=p)
 	
 	r = gReader.read()
 	if id in r:
 		r = r[id] #--matrix
 		series = []
+
 		for row in r:
 			
 			series += [item for item in row if str(item['label'])== app]
@ -156,19 +133,37 @@ def dashboard():
 """
@app.route('/learn')
 def learn():
-	app = request.args.get('app')
-	id	= request.args.get('id')
+	global CONFIG
 	p = CONFIG['store']['args']
 	class_read = CONFIG['store']['class']['read']	
-
-	p['qid'] =[id] #HANDLERS['processes']['config'].keys()
 	gReader = factory.instance(type=class_read,args=p)
+	d =  gReader.read()
+	if 'learn' in d :
+		logs = d['learn']
+		del d['learn']
+	else :
+		logs = []
+	r = []
+	if 'id' in request.args:
+		id = request.args['id']
+		d = d[id]
+		print CONFIG['monitor']['processes']['config'][id]
+		print (apps)
+		
+		
+		#apps = list(set(ML.Extract(['label'],d)))
+		p = AnomalyDetection()
+		#for row in d :
+			#xo = ML.Filter('label',app,d)
+			#info = ML.Filter('label',app,logs)	
+			#value = p.predict(xo,info)
+			#print app,value
+			#if value is not None:
+			#	r.append(value)
+	print r
+	return json.dumps("[]")
+		

-	r = gReader.read()
-	r = r[id]
-	r = ML.Filter('label',app,r)
-	label = ML.Extract(['status'],r)
-	r = ML.Extract(['cpu_usage','memory_usage'],r)

@app.route('/anomalies/status')
 def anomalies_status():
@ -180,7 +175,7 @@ def anomalies_get():
 	
 if __name__== '__main__':
 	
-	ThreadManager.start(CONFIG)	
+	#ThreadManager.start(CONFIG)	
 	app.run(host='0.0.0.0',debug=True,threaded=True)

 	
--- a/src/utils/ml.py
+++ b/src/utils/ml.py
@ -16,9 +16,12 @@ class ML:
 		# @TODO: Make sure this approach works across all transport classes
 		# We may have a potential issue of how the data is stored ... it may not scale
 		#
+		
 		return [item[0] for item in data if item and attr in item[0] and item[0][attr] == value]
 	@staticmethod
 	def Extract(lattr,data):
+		if isinstance(lattr,basestring):
+			lattr = [lattr]
 		return [[row[id] for id in lattr] for row in data]

 	
@ -27,6 +30,7 @@ class ML:
 	@TODO: determine computationally determine epsilon
 """
 class AnomalyDetection:
+		
 	def split(self,data,index=-1,threshold=0.8) :
 		N	= len(data)
 		# if N < LIMIT:
@ -64,12 +68,21 @@ class AnomalyDetection:
 		yo = self.split(yo)

 		if xo['train'] :
+			E = 0.01
+			for i in range(0,10):
+				Epsilon = E + (2*E*i)
 				p = self.gParameters(xo['train'])
 				
-			px =  self.gPx(p['mean'],p['cov'],xo['test'])
+				px =  self.gPx(p['mean'],p['cov'],xo['test'],Epsilon)
 				
 				perf = self.gPerformance(px,yo['test'])
-			return {"parameters":p,"performance":perf}
+				if perf['fscore'] > 0 :
+					
+					perf['epsilon'] = Epsilon
+					
+					break
+			
+			return {"label":value,"parameters":p,"performance":perf}
 		return None
 	def getLabel(self,yo,label_conf):
 		return [ int(len(set(item) & set(label_conf["1"]))>0) for item in yo ]
@ -77,9 +90,10 @@ class AnomalyDetection:

 	"""
 		This function will compute the probability density function given a particular event/set of events
+		The return value is [px,yo]
 		@pre xu.shape[0] == sigma[0] == sigma[1]
 	"""
-	def gPx(self,xu,sigma,data,EPSILON=0.25):
+	def gPx(self,xu,sigma,data,EPSILON=0.01):
 		n = len(data[0])
 		
 		r = []
@ -95,6 +109,21 @@ class AnomalyDetection:
 			px = float(b/a)
 			r.append([px,int(px < EPSILON)])
 		return r
+	"""
+		This function uses stored learnt information to predict on raw data
+		In this case it will determin if we have an anomaly or not 
+		@param xo	raw observations (matrix)
+		@param info	stored information about this	
+	"""
+	def predict(self,xo,info):
+		xo = ML.Filter(info['extract'],xo)
+		if not xo :
+			return None
+		
+		sigma = info['parameters']['cov']
+		xu	= info['parameters']['mean']
+		epsilon = info['performance']['epsilon']
+		return self.getPx(xu,sigma,xo,epsilon)
 	"""
 		This function computes performance metrics i.e precision, recall and f-score
 		for details visit https://en.wikipedia.org/wiki/Precision_and_recall
@ -134,3 +163,13 @@ class AnomalyDetection:
 		sigma = np.cov(m)
 		sigma = [ list(row) for row in sigma]
 		return {"cov":sigma,"mean":list(u)}
+
+
+class Regression:
+	parameters = {}
+	@staticmethod
+	def predict(xo):
+		pass
+	
+	def __init__(self,config):
+		pass
--- a/src/utils/workers.py
+++ b/src/utils/workers.py
@ -1,7 +1,7 @@
 #import multiprocessing
 from threading import Thread, RLock
 from utils import transport
-from utils.ml import AnomalyDetection
+from utils.ml import AnomalyDetection,ML
 import time
 import monitor
 import sys
@ -76,16 +76,38 @@ class Learner(Thread) :
 	def run(self):
 		reader = self.factory.instance(type=self.reader_class,args=self.rw_args)
 		data = reader.read()
+		
+		
+		#
+		# Let's make sure we extract that which has aleady been learnt
+		#
+		
+		if 'learn' in data:
+			r = data['learn']
+			del data['learn']
+			
+			r = ML.Extract('label',r)
+			logs = [row[0] for row in r]
+			logs = list(set(logs))
+				
+			
+		else:
+			logs = []
 		#
-		# This is the motherload of innefficiency ...
+		# In order to address the inefficiencies below, we chose to adopt the following policy
+		# We don't learn that which is already learnt, This measure consists in filtering out the list of the apps that already have learning data
 		#
+		self.apps = list(set(self.apps) - set(logs))
 		while self.quit == False:
 			r = {}
+			lapps = list(self.apps)
 			for key in data :
 				logs = data[key]
-				
-				for app in self.apps:
-					
+				#
+				# There poor design at this point, we need to make sure things tested don't get tested again
+				# This creates innefficiencies (cartesian product)
+				#
+				for app in lapps:
 					handler = AnomalyDetection()
 					value = handler.learn(logs,'label',app,self.features,self.yo)
 					
@ -94,17 +116,23 @@ class Learner(Thread) :
 						if key not in r:
 							r[key] = {}
 						r[key][app] = value
+						i = lapps.index(app)
+						del lapps[i]
 						#
-			# At this point we've already learnt every thing we need to learn
+						# This offers a clean write to the data store upon value retrieved
+						# The removal of the application enables us to improve efficiency (among other things)
 						#
-			
-			if r.keys() :
-				
+						value = dict(value,**{"features":self.features})
 						self.lock.acquire()
 						writer = self.factory.instance(type=self.write_class,args=self.rw_args)
-				writer.write(label='learn',row=r)
+						writer.write(label='learn',row=value)
 						self.lock.release()

+				
+			#
+			# Usually this is used for development
+			# @TODO : Remove this  and find a healthy way to stop the server
+			#
 			if 'MONITOR_CONFIG_PATH' in os.environ:
 				#
 				# This suggests we are in development mode