developing scalable models

6 years ago · bbca190ba4
parent a975202693
commit bbca190ba4
13 changed files with 168 additions and 74 deletions
--- a/src/models/init.py
+++ b/src/models/init.py
@ -0,0 +1,86 @@
+""""
+    This class defines the basic structure for a model, models can be either statistical or machine learning
+    and will be tightly coupled with the rendering engines (matplotlib or chartjs)
+"""
+import pandas as pd
+
+class model :
+    """
+    This model provides an overview of the raw data provided a list of variables. If one variable is provided a regression line will be added.
+    The intent of this model is to allow the user to visualize the distribution and trend of the data as is
+    """
+    def __init__(self,**args):
+        """
+            @param data
+            @param node name of the node
+            @param y_attr   attributes on the y_axis
+            @param x_attr   attributes on the x_axis
+        """
+        self.data = args['data']
+        #self.node = args['node']
+        self.months = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
+        self.cache = {}
+        if 'type' not in args :
+            self.set("type","scatter")
+        #self.x_attr = args['x_attr']
+        #self.y_attr = args['y_attr']
+        #self.set("x",self.data[x_attr].tolist())
+        #self.set("y",self.data[y_attr].tolist())
+    def can_do(self):
+        """
+        This function will determine if the model can be processed or has met the preconditions for processing
+        """
+        return self.data.shape[0] > 1 and self.data.shape[1] > 2
+    def format_date(self,row):
+        m = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
+        return "-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ])
+    def compute(self):
+        """
+        We compute a simple regression if and only if a single attribute is provided.
+        The framework of choice to compute the regression (for now) sklearn
+        @TODO: Find ways perhaps to use tensorflow
+        """
+    def set(self,key,value):
+        self.cache[key] = value
+    def get(self,key):
+        return self.cache[key]
+# class simple:
+#     class app_status(model):
+#         """
+#         This model will perform a simple count of application status
+#         The intent is to quickly inform the user if there's a crash     
+#         """
+#         def __init(self,**args):
+#             model.__init__(self,**args)
+#         def compute(self):
+#             """
+#                 This function performs the actual counts associated with the status of an application 
+#             """
+#             df      = self.data[df.name.str.contains('other',na=False)==False]
+#             x_crash = df.status.str.contains('X').sum()
+#             x_idle  = df.status.str.contains('S').sum()
+#             x_run   = df.shape[0] - x_crash - x_idle
+#             odf = pd.DataFrame({"labels":['crash','idle','running'],"counts":[x_crash,x_idle,x_run]})
+#             self.set("type","doughnut")
+#             # self.set("labels",["crash","idle","running"])
+#             # self.set("data",{"data":[x_crash,x_idle,x_run]})
+#             self.set('data',odf)
+#             if x_crash > 0 :
+#                 self.set("analysis"," ".join([x_crash,"applications found out of ",str(df.shape[0]),"monitored" ]))
+#     class app_resource(model):
+#         """
+#             This model will group the applications that are monitored and the rest of the system to guage resource consumption (CPU,RAM)
+#         """      
+#         def __init__(self,**args):
+#             model.__init__(self,**args)    
+#         def compute(self):
+#             N = self.data.shape[0] - 1
+            
+#             df = pd.DataFrame(self.data[self.data.name == 'other'].sum()[['cpu','mem']] ) .T
+#             df = df.append(pd.DataFrame( self.data[self.data.name != 'other'].sum()[['cpu','mem']] ).T)
+#             df['labels'] = ['other','monitored']
+#             # other_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)])
+#             # watch_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)==False])
+#             # datasets = [[other_df.cpu.sum(),watch_df.cpu.sum()],[other_df.mem.sum(),watch_df.mem.sum()]]
+#             self.set("data",df)
+#             self.set("type","bar")
--- a/src/models/basic.py
+++ b/src/models/basic.py
@ -1,66 +0,0 @@
-""""
-    This class defines the basic structure for a model, models can be either statistical or machine learning
-    and will be tightly coupled with the rendering engines (matplotlib or chartjs)
-""""
-
-class model :
-    """
-    This model provides an overview of the raw data provided a list of variables. If one variable is provided a regression line will be added.
-    The intent of this model is to allow the user to visualize the distribution and trend of the data as is
-    """
-    def __init__(**args):
-        self.data = args['data']
-        self.node = args['node']
-        self.months = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
-        self.cache = {}
-        self.set("type","scatter")
-    def can_do(self):
-        return False
-    def format_date(self,row):
-        m = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
-        return "-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ])
-    
-    def set(self,key,value):
-        self.cache[key] = value
-    def get(self,key):
-        return self.cache[key]
-# class simple:
-#     class app_status(model):
-#         """
-#         This model will perform a simple count of application status
-#         The intent is to quickly inform the user if there's a crash     
-#         """
-#         def __init(self,**args):
-#             model.__init__(self,**args)
-#         def compute(self):
-#             """
-#                 This function performs the actual counts associated with the status of an application 
-#             """
-#             df      = self.data[df.name.str.contains('other',na=False)==False]
-#             x_crash = df.status.str.contains('X').sum()
-#             x_idle  = df.status.str.contains('S').sum()
-#             x_run   = df.shape[0] - x_crash - x_idle
-#             odf = pd.DataFrame({"labels":['crash','idle','running'],"counts":[x_crash,x_idle,x_run]})
-#             self.set("type","doughnut")
-#             # self.set("labels",["crash","idle","running"])
-#             # self.set("data",{"data":[x_crash,x_idle,x_run]})
-#             self.set('data',odf)
-#             if x_crash > 0 :
-#                 self.set("analysis"," ".join([x_crash,"applications found out of ",str(df.shape[0]),"monitored" ]))
-#     class app_resource(model):
-#         """
-#             This model will group the applications that are monitored and the rest of the system to guage resource consumption (CPU,RAM)
-#         """      
-#         def __init__(self,**args):
-#             model.__init__(self,**args)    
-#         def compute(self):
-#             N = self.data.shape[0] - 1
-            
-#             df = pd.DataFrame(self.data[self.data.name == 'other'].sum()[['cpu','mem']] ) .T
-#             df = df.append(pd.DataFrame( self.data[self.data.name != 'other'].sum()[['cpu','mem']] ).T)
-#             df['labels'] = ['other','monitored']
-#             # other_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)])
-#             # watch_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)==False])
-#             # datasets = [[other_df.cpu.sum(),watch_df.cpu.sum()],[other_df.mem.sum(),watch_df.mem.sum()]]
-#             self.set("data",df)
-#             self.set("type","bar")
--- a/src/models/factory.py
+++ b/src/models/factory.py
@ -0,0 +1,26 @@
+import models
+import free
+import paid
+def instance(id,**args):
+	""" 
+	Returns an instance of a model given the following :
+        @param data
+        @param x_attr
+        @param y_attr
+        @param node
+
+        """
+	collection = []
+        data = args['data']
+	for pkg_name in ['apps','folders'] :
+            
+            if pkg_name in data and pkg_name in dir(eval(id)):
+                records = data[pkg_name]
+                module = eval(".".join([id,pkg_name]))
+                collection += [ eval(".".join([id,pkg_name,name]))(data=records) for name in dir(module) if not name.startswith('__')]
+	#   
+	# let's create the instances and run the models and return the caches of each model
+	#   
+	
+	return collection
+
--- a/src/models/free/init.py
+++ b/src/models/free/init.py
@ -1 +1,7 @@
+"""
+This package serves various FREE models in order to provide insight for apps and folders
+The models will show basic general trends and occasionally a regression if applicable.

+"""
+#import folders
+import apps
--- a/src/models/free/apps.py
+++ b/src/models/free/apps.py
@ -8,8 +8,9 @@

    @TODO: Include process counts in the equation so as to add another variable (good for ml)
 """
-from models.basic import model
-
+# from models.basic import *
+# import models.basic.model as model
+from models import model
 class status(model):
        """
        This model will perform a simple count of application status
@ -19,9 +20,9 @@ class status(model):
            model.__init__(self,**args)
        def compute(self):
            """
-                This function performs the actual counts associated with the status of an application 
+            This function performs the actual counts associated with the status of an application 
            """
-            df      = self.data[df.name.str.contains('other',na=False)==False]
+            df      = self.data[self.data.name.str.contains('other',na=False)==False]
            x_crash = df.status.str.contains('X').sum()
            x_idle  = df.status.str.contains('S').sum()
            x_run   = df.shape[0] - x_crash - x_idle
@ -55,8 +56,8 @@ class trend(model):
    """
    def __init__(self,**args):
        model.__init__(self,**args)
-        self.attr_name = args['name']
-        self.attr_values= args['values']
+        #self.attr_name = args['name']
+        #self.attr_values= args['values']
    def compute(self):
        df = self.data[self.data[self.attr_name].isin(self.attr_values)]
        cols = ['cpu','mem']
--- a/src/models/paid/init.py
+++ b/src/models/paid/init.py
@ -8,3 +8,5 @@
        - Clustering
        - And Crash Prediction (regression)
 """
+import anomalies
+import rank
--- a/src/models/paid/init.pyc
+++ b/src/models/paid/init.pyc
--- a/src/models/paid/anomalies.py
+++ b/src/models/paid/anomalies.py
@ -0,0 +1,6 @@
+from models import model
+
+class apps(model):
+    pass
+class user(model):
+    pass
--- a/src/models/paid/anomalies.pyc
+++ b/src/models/paid/anomalies.pyc
--- a/src/models/paid/folders.py
+++ b/src/models/paid/folders.py
@ -0,0 +1 @@
+
--- a/src/models/paid/rank.py
+++ b/src/models/paid/rank.py
@ -0,0 +1,7 @@
+from models import model
+class apps (model):
+	pass
+class folders(model):
+	pass
+class user(model):
+	pass
--- a/src/models/paid/rank.pyc
+++ b/src/models/paid/rank.pyc
--- a/test/TestModel.py
+++ b/test/TestModel.py
@ -0,0 +1,25 @@
+from utils import transport
+import unittest
+import json
+import os
+from models import model, factory
+import pandas as pd
+
+path = os.environ['CONFIG_PATH']
+f = open(path)
+CONFIG = json.loads( f.read())
+CONFIG['store']['args']['uid'] = 'cus_D2x3ItYNfWjSY3'
+f.close()
+dfactory = transport.DataSourceFactory()
+reader = dfactory.instance(type='CouchdbReader',args = CONFIG['store']['args'])
+
+class TestModel(unittest.TestCase):
+	def setUp(self):
+		self.document = reader.read()		
+		self.models = []
+	def test_ScatterPlot(self):
+		r = reader.view('clients/logs',key=CONFIG['store']['args']['uid'])
+		print factory.instance('free',data=r)
+		pass
+if __name__ == '__main__' :
+	unittest.main()