From bbca190ba4dc0f163f051cd80a0b38df1ee3e416 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Wed, 12 Dec 2018 15:39:57 -0600 Subject: [PATCH] developing scalable models --- src/models/__init__.py | 86 ++++++++++++++++++++++++++++++++++ src/models/basic.py | 66 -------------------------- src/models/factory.py | 26 ++++++++++ src/models/free/__init__.py | 6 +++ src/models/free/apps.py | 15 +++--- src/models/paid/__init__.py | 4 +- src/models/paid/__init__.pyc | Bin 0 -> 493 bytes src/models/paid/anomalies.py | 6 +++ src/models/paid/anomalies.pyc | Bin 0 -> 588 bytes src/models/paid/folders.py | 1 + src/models/paid/rank.py | 7 +++ src/models/paid/rank.pyc | Bin 0 -> 756 bytes test/TestModel.py | 25 ++++++++++ 13 files changed, 168 insertions(+), 74 deletions(-) delete mode 100644 src/models/basic.py create mode 100644 src/models/factory.py create mode 100644 src/models/paid/__init__.pyc create mode 100644 src/models/paid/anomalies.py create mode 100644 src/models/paid/anomalies.pyc create mode 100644 src/models/paid/folders.py create mode 100644 src/models/paid/rank.py create mode 100644 src/models/paid/rank.pyc create mode 100644 test/TestModel.py diff --git a/src/models/__init__.py b/src/models/__init__.py index e69de29..dcb57d4 100644 --- a/src/models/__init__.py +++ b/src/models/__init__.py @@ -0,0 +1,86 @@ +"""" + This class defines the basic structure for a model, models can be either statistical or machine learning + and will be tightly coupled with the rendering engines (matplotlib or chartjs) +""" +import pandas as pd + +class model : + """ + This model provides an overview of the raw data provided a list of variables. If one variable is provided a regression line will be added. + The intent of this model is to allow the user to visualize the distribution and trend of the data as is + """ + def __init__(self,**args): + """ + @param data + @param node name of the node + @param y_attr attributes on the y_axis + @param x_attr attributes on the x_axis + """ + self.data = args['data'] + #self.node = args['node'] + self.months = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"} + self.cache = {} + if 'type' not in args : + self.set("type","scatter") + #self.x_attr = args['x_attr'] + #self.y_attr = args['y_attr'] + #self.set("x",self.data[x_attr].tolist()) + #self.set("y",self.data[y_attr].tolist()) + def can_do(self): + """ + This function will determine if the model can be processed or has met the preconditions for processing + """ + return self.data.shape[0] > 1 and self.data.shape[1] > 2 + def format_date(self,row): + m = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"} + return "-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ]) + def compute(self): + """ + We compute a simple regression if and only if a single attribute is provided. + The framework of choice to compute the regression (for now) sklearn + @TODO: Find ways perhaps to use tensorflow + """ + def set(self,key,value): + self.cache[key] = value + def get(self,key): + return self.cache[key] +# class simple: +# class app_status(model): +# """ +# This model will perform a simple count of application status +# The intent is to quickly inform the user if there's a crash +# """ +# def __init(self,**args): +# model.__init__(self,**args) +# def compute(self): +# """ +# This function performs the actual counts associated with the status of an application +# """ +# df = self.data[df.name.str.contains('other',na=False)==False] +# x_crash = df.status.str.contains('X').sum() +# x_idle = df.status.str.contains('S').sum() +# x_run = df.shape[0] - x_crash - x_idle +# odf = pd.DataFrame({"labels":['crash','idle','running'],"counts":[x_crash,x_idle,x_run]}) +# self.set("type","doughnut") +# # self.set("labels",["crash","idle","running"]) +# # self.set("data",{"data":[x_crash,x_idle,x_run]}) +# self.set('data',odf) +# if x_crash > 0 : +# self.set("analysis"," ".join([x_crash,"applications found out of ",str(df.shape[0]),"monitored" ])) +# class app_resource(model): +# """ +# This model will group the applications that are monitored and the rest of the system to guage resource consumption (CPU,RAM) +# """ +# def __init__(self,**args): +# model.__init__(self,**args) +# def compute(self): +# N = self.data.shape[0] - 1 + +# df = pd.DataFrame(self.data[self.data.name == 'other'].sum()[['cpu','mem']] ) .T +# df = df.append(pd.DataFrame( self.data[self.data.name != 'other'].sum()[['cpu','mem']] ).T) +# df['labels'] = ['other','monitored'] +# # other_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)]) +# # watch_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)==False]) +# # datasets = [[other_df.cpu.sum(),watch_df.cpu.sum()],[other_df.mem.sum(),watch_df.mem.sum()]] +# self.set("data",df) +# self.set("type","bar") diff --git a/src/models/basic.py b/src/models/basic.py deleted file mode 100644 index 0f7841e..0000000 --- a/src/models/basic.py +++ /dev/null @@ -1,66 +0,0 @@ -"""" - This class defines the basic structure for a model, models can be either statistical or machine learning - and will be tightly coupled with the rendering engines (matplotlib or chartjs) -"""" - -class model : - """ - This model provides an overview of the raw data provided a list of variables. If one variable is provided a regression line will be added. - The intent of this model is to allow the user to visualize the distribution and trend of the data as is - """ - def __init__(**args): - self.data = args['data'] - self.node = args['node'] - self.months = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"} - self.cache = {} - self.set("type","scatter") - def can_do(self): - return False - def format_date(self,row): - m = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"} - return "-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ]) - - def set(self,key,value): - self.cache[key] = value - def get(self,key): - return self.cache[key] -# class simple: -# class app_status(model): -# """ -# This model will perform a simple count of application status -# The intent is to quickly inform the user if there's a crash -# """ -# def __init(self,**args): -# model.__init__(self,**args) -# def compute(self): -# """ -# This function performs the actual counts associated with the status of an application -# """ -# df = self.data[df.name.str.contains('other',na=False)==False] -# x_crash = df.status.str.contains('X').sum() -# x_idle = df.status.str.contains('S').sum() -# x_run = df.shape[0] - x_crash - x_idle -# odf = pd.DataFrame({"labels":['crash','idle','running'],"counts":[x_crash,x_idle,x_run]}) -# self.set("type","doughnut") -# # self.set("labels",["crash","idle","running"]) -# # self.set("data",{"data":[x_crash,x_idle,x_run]}) -# self.set('data',odf) -# if x_crash > 0 : -# self.set("analysis"," ".join([x_crash,"applications found out of ",str(df.shape[0]),"monitored" ])) -# class app_resource(model): -# """ -# This model will group the applications that are monitored and the rest of the system to guage resource consumption (CPU,RAM) -# """ -# def __init__(self,**args): -# model.__init__(self,**args) -# def compute(self): -# N = self.data.shape[0] - 1 - -# df = pd.DataFrame(self.data[self.data.name == 'other'].sum()[['cpu','mem']] ) .T -# df = df.append(pd.DataFrame( self.data[self.data.name != 'other'].sum()[['cpu','mem']] ).T) -# df['labels'] = ['other','monitored'] -# # other_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)]) -# # watch_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)==False]) -# # datasets = [[other_df.cpu.sum(),watch_df.cpu.sum()],[other_df.mem.sum(),watch_df.mem.sum()]] -# self.set("data",df) -# self.set("type","bar") diff --git a/src/models/factory.py b/src/models/factory.py new file mode 100644 index 0000000..b053c6b --- /dev/null +++ b/src/models/factory.py @@ -0,0 +1,26 @@ +import models +import free +import paid +def instance(id,**args): + """ + Returns an instance of a model given the following : + @param data + @param x_attr + @param y_attr + @param node + + """ + collection = [] + data = args['data'] + for pkg_name in ['apps','folders'] : + + if pkg_name in data and pkg_name in dir(eval(id)): + records = data[pkg_name] + module = eval(".".join([id,pkg_name])) + collection += [ eval(".".join([id,pkg_name,name]))(data=records) for name in dir(module) if not name.startswith('__')] + # + # let's create the instances and run the models and return the caches of each model + # + + return collection + diff --git a/src/models/free/__init__.py b/src/models/free/__init__.py index 8b13789..3331532 100644 --- a/src/models/free/__init__.py +++ b/src/models/free/__init__.py @@ -1 +1,7 @@ +""" +This package serves various FREE models in order to provide insight for apps and folders +The models will show basic general trends and occasionally a regression if applicable. +""" +#import folders +import apps diff --git a/src/models/free/apps.py b/src/models/free/apps.py index 9d16bed..510ecc3 100644 --- a/src/models/free/apps.py +++ b/src/models/free/apps.py @@ -8,8 +8,9 @@ @TODO: Include process counts in the equation so as to add another variable (good for ml) """ -from models.basic import model - +# from models.basic import * +# import models.basic.model as model +from models import model class status(model): """ This model will perform a simple count of application status @@ -19,9 +20,9 @@ class status(model): model.__init__(self,**args) def compute(self): """ - This function performs the actual counts associated with the status of an application + This function performs the actual counts associated with the status of an application """ - df = self.data[df.name.str.contains('other',na=False)==False] + df = self.data[self.data.name.str.contains('other',na=False)==False] x_crash = df.status.str.contains('X').sum() x_idle = df.status.str.contains('S').sum() x_run = df.shape[0] - x_crash - x_idle @@ -55,8 +56,8 @@ class trend(model): """ def __init__(self,**args): model.__init__(self,**args) - self.attr_name = args['name'] - self.attr_values= args['values'] + #self.attr_name = args['name'] + #self.attr_values= args['values'] def compute(self): df = self.data[self.data[self.attr_name].isin(self.attr_values)] cols = ['cpu','mem'] @@ -67,4 +68,4 @@ class trend(model): # optimizer = tf.train.RMSPropOptimizer(0.001) -# model.compile(loss='mse', optimizer=optimizer,metrics=['mae']) \ No newline at end of file +# model.compile(loss='mse', optimizer=optimizer,metrics=['mae']) diff --git a/src/models/paid/__init__.py b/src/models/paid/__init__.py index 69d7c7e..d01feab 100644 --- a/src/models/paid/__init__.py +++ b/src/models/paid/__init__.py @@ -7,4 +7,6 @@ - Anomaly detection - Clustering - And Crash Prediction (regression) -""" \ No newline at end of file +""" +import anomalies +import rank diff --git a/src/models/paid/__init__.pyc b/src/models/paid/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c51fd96d2324344a639bd33ff62e86dbe1e2f11 GIT binary patch literal 493 zcmYjNZA-&25KcSsrRoR&!Ld&rxJCUWGRDTf>KJSw3?XFA(FWV3B-v!Y+aI9UOmGLi zJbCW!x%BCM-)8UqyN`nXMM~$CKHsOL7~4=zm`YilunmDDPT3~y$W`9!dao14GN8zd z0X#iCKEtY!E)3VUy@#dL3f`>3+Q*(%z&*n22o?uywi3p^Bc`FkVO!}@VVg;o#m;qw z$LS_cgLDYBbS2gY*h-Iz7`rnH8d+4@P!Xjw+LR!P{L*y|v{moXfi_;3RqzCYg_kJb zJ$MK+V;f0=3Ii5FTXRyI*E@QgBgGSPM%r^HeFaO0O2-+<9hMHgr};sqe<&8wns7n> z2%&5tM7Sa><2sE#oYBom^O@7h{*xcml~=Yw9*yCp4%|mOgKfD-NB_gUEBMhB-jcsu c2yJu_V$>eOB?*kFxT|rJ5%%4t^W-M^1+i6y?*IS* literal 0 HcmV?d00001 diff --git a/src/models/paid/anomalies.py b/src/models/paid/anomalies.py new file mode 100644 index 0000000..9323a10 --- /dev/null +++ b/src/models/paid/anomalies.py @@ -0,0 +1,6 @@ +from models import model + +class apps(model): + pass +class user(model): + pass diff --git a/src/models/paid/anomalies.pyc b/src/models/paid/anomalies.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3249d86753c2d4918b2c7849f675c923dea82efc GIT binary patch literal 588 zcmb_ZK~BRk5F96MK$Vc-!W(=6e*h5T0UX>D6;fo?1|r3cm31p|h94u&u5#yvBai{J!T&0-hg*9*7Dn1?4LQg%<^1;}iG*o>mq7>`$E^)Ed6Q zQlD^}NehrY7^z>`iH0;HW?YUa=tLl(JhH!g;@&5N?&O52{7 z(^o8^FuG-C-i8Fjd0TI09H+~0H26NBYPp#o}!rGl~%oWe%I#`pw2fahfdoAl~v zP-ED%SI2{zz)rBp&Uoqan_H%sg%LyDSXYIFS<$vMxRLn>E}AenhzYaEm?MiAg2;Kk zEoztNVcHFNrCr6t!&@w&V|2qzzc#hg6x`nFmojLo3m>jR(`s^l@4P0z)}0m7ZBbh7 zi|yNW`)(N$3~`4nM-#wZVz|iPEL