developing scalable models

community
Steve L. Nyemba 6 years ago
parent a975202693
commit bbca190ba4

@ -0,0 +1,86 @@
""""
This class defines the basic structure for a model, models can be either statistical or machine learning
and will be tightly coupled with the rendering engines (matplotlib or chartjs)
"""
import pandas as pd
class model :
"""
This model provides an overview of the raw data provided a list of variables. If one variable is provided a regression line will be added.
The intent of this model is to allow the user to visualize the distribution and trend of the data as is
"""
def __init__(self,**args):
"""
@param data
@param node name of the node
@param y_attr attributes on the y_axis
@param x_attr attributes on the x_axis
"""
self.data = args['data']
#self.node = args['node']
self.months = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
self.cache = {}
if 'type' not in args :
self.set("type","scatter")
#self.x_attr = args['x_attr']
#self.y_attr = args['y_attr']
#self.set("x",self.data[x_attr].tolist())
#self.set("y",self.data[y_attr].tolist())
def can_do(self):
"""
This function will determine if the model can be processed or has met the preconditions for processing
"""
return self.data.shape[0] > 1 and self.data.shape[1] > 2
def format_date(self,row):
m = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
return "-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ])
def compute(self):
"""
We compute a simple regression if and only if a single attribute is provided.
The framework of choice to compute the regression (for now) sklearn
@TODO: Find ways perhaps to use tensorflow
"""
def set(self,key,value):
self.cache[key] = value
def get(self,key):
return self.cache[key]
# class simple:
# class app_status(model):
# """
# This model will perform a simple count of application status
# The intent is to quickly inform the user if there's a crash
# """
# def __init(self,**args):
# model.__init__(self,**args)
# def compute(self):
# """
# This function performs the actual counts associated with the status of an application
# """
# df = self.data[df.name.str.contains('other',na=False)==False]
# x_crash = df.status.str.contains('X').sum()
# x_idle = df.status.str.contains('S').sum()
# x_run = df.shape[0] - x_crash - x_idle
# odf = pd.DataFrame({"labels":['crash','idle','running'],"counts":[x_crash,x_idle,x_run]})
# self.set("type","doughnut")
# # self.set("labels",["crash","idle","running"])
# # self.set("data",{"data":[x_crash,x_idle,x_run]})
# self.set('data',odf)
# if x_crash > 0 :
# self.set("analysis"," ".join([x_crash,"applications found out of ",str(df.shape[0]),"monitored" ]))
# class app_resource(model):
# """
# This model will group the applications that are monitored and the rest of the system to guage resource consumption (CPU,RAM)
# """
# def __init__(self,**args):
# model.__init__(self,**args)
# def compute(self):
# N = self.data.shape[0] - 1
# df = pd.DataFrame(self.data[self.data.name == 'other'].sum()[['cpu','mem']] ) .T
# df = df.append(pd.DataFrame( self.data[self.data.name != 'other'].sum()[['cpu','mem']] ).T)
# df['labels'] = ['other','monitored']
# # other_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)])
# # watch_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)==False])
# # datasets = [[other_df.cpu.sum(),watch_df.cpu.sum()],[other_df.mem.sum(),watch_df.mem.sum()]]
# self.set("data",df)
# self.set("type","bar")

@ -1,66 +0,0 @@
""""
This class defines the basic structure for a model, models can be either statistical or machine learning
and will be tightly coupled with the rendering engines (matplotlib or chartjs)
""""
class model :
"""
This model provides an overview of the raw data provided a list of variables. If one variable is provided a regression line will be added.
The intent of this model is to allow the user to visualize the distribution and trend of the data as is
"""
def __init__(**args):
self.data = args['data']
self.node = args['node']
self.months = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
self.cache = {}
self.set("type","scatter")
def can_do(self):
return False
def format_date(self,row):
m = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"}
return "-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ])
def set(self,key,value):
self.cache[key] = value
def get(self,key):
return self.cache[key]
# class simple:
# class app_status(model):
# """
# This model will perform a simple count of application status
# The intent is to quickly inform the user if there's a crash
# """
# def __init(self,**args):
# model.__init__(self,**args)
# def compute(self):
# """
# This function performs the actual counts associated with the status of an application
# """
# df = self.data[df.name.str.contains('other',na=False)==False]
# x_crash = df.status.str.contains('X').sum()
# x_idle = df.status.str.contains('S').sum()
# x_run = df.shape[0] - x_crash - x_idle
# odf = pd.DataFrame({"labels":['crash','idle','running'],"counts":[x_crash,x_idle,x_run]})
# self.set("type","doughnut")
# # self.set("labels",["crash","idle","running"])
# # self.set("data",{"data":[x_crash,x_idle,x_run]})
# self.set('data',odf)
# if x_crash > 0 :
# self.set("analysis"," ".join([x_crash,"applications found out of ",str(df.shape[0]),"monitored" ]))
# class app_resource(model):
# """
# This model will group the applications that are monitored and the rest of the system to guage resource consumption (CPU,RAM)
# """
# def __init__(self,**args):
# model.__init__(self,**args)
# def compute(self):
# N = self.data.shape[0] - 1
# df = pd.DataFrame(self.data[self.data.name == 'other'].sum()[['cpu','mem']] ) .T
# df = df.append(pd.DataFrame( self.data[self.data.name != 'other'].sum()[['cpu','mem']] ).T)
# df['labels'] = ['other','monitored']
# # other_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)])
# # watch_df = pd.DataFrame(self.data[self.data.name.str.contains('other',na=False)==False])
# # datasets = [[other_df.cpu.sum(),watch_df.cpu.sum()],[other_df.mem.sum(),watch_df.mem.sum()]]
# self.set("data",df)
# self.set("type","bar")

@ -0,0 +1,26 @@
import models
import free
import paid
def instance(id,**args):
"""
Returns an instance of a model given the following :
@param data
@param x_attr
@param y_attr
@param node
"""
collection = []
data = args['data']
for pkg_name in ['apps','folders'] :
if pkg_name in data and pkg_name in dir(eval(id)):
records = data[pkg_name]
module = eval(".".join([id,pkg_name]))
collection += [ eval(".".join([id,pkg_name,name]))(data=records) for name in dir(module) if not name.startswith('__')]
#
# let's create the instances and run the models and return the caches of each model
#
return collection

@ -1 +1,7 @@
"""
This package serves various FREE models in order to provide insight for apps and folders
The models will show basic general trends and occasionally a regression if applicable.
"""
#import folders
import apps

@ -8,8 +8,9 @@
@TODO: Include process counts in the equation so as to add another variable (good for ml)
"""
from models.basic import model
# from models.basic import *
# import models.basic.model as model
from models import model
class status(model):
"""
This model will perform a simple count of application status
@ -19,9 +20,9 @@ class status(model):
model.__init__(self,**args)
def compute(self):
"""
This function performs the actual counts associated with the status of an application
This function performs the actual counts associated with the status of an application
"""
df = self.data[df.name.str.contains('other',na=False)==False]
df = self.data[self.data.name.str.contains('other',na=False)==False]
x_crash = df.status.str.contains('X').sum()
x_idle = df.status.str.contains('S').sum()
x_run = df.shape[0] - x_crash - x_idle
@ -55,8 +56,8 @@ class trend(model):
"""
def __init__(self,**args):
model.__init__(self,**args)
self.attr_name = args['name']
self.attr_values= args['values']
#self.attr_name = args['name']
#self.attr_values= args['values']
def compute(self):
df = self.data[self.data[self.attr_name].isin(self.attr_values)]
cols = ['cpu','mem']

@ -8,3 +8,5 @@
- Clustering
- And Crash Prediction (regression)
"""
import anomalies
import rank

Binary file not shown.

@ -0,0 +1,6 @@
from models import model
class apps(model):
pass
class user(model):
pass

Binary file not shown.

@ -0,0 +1,7 @@
from models import model
class apps (model):
pass
class folders(model):
pass
class user(model):
pass

Binary file not shown.

@ -0,0 +1,25 @@
from utils import transport
import unittest
import json
import os
from models import model, factory
import pandas as pd
path = os.environ['CONFIG_PATH']
f = open(path)
CONFIG = json.loads( f.read())
CONFIG['store']['args']['uid'] = 'cus_D2x3ItYNfWjSY3'
f.close()
dfactory = transport.DataSourceFactory()
reader = dfactory.instance(type='CouchdbReader',args = CONFIG['store']['args'])
class TestModel(unittest.TestCase):
def setUp(self):
self.document = reader.read()
self.models = []
def test_ScatterPlot(self):
r = reader.view('clients/logs',key=CONFIG['store']['args']['uid'])
print factory.instance('free',data=r)
pass
if __name__ == '__main__' :
unittest.main()
Loading…
Cancel
Save