From b290632391683f778810179399fcf181d8713aa9 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Wed, 9 Jan 2019 12:40:09 -0600 Subject: [PATCH] New dashboard implementation, reusable and more verbose --- src/api/index.py | 59 +++++++--- src/models/__init__.py | 95 ++++++++++++--- src/models/factory.py | 222 +++++++++++++++++++++++++++++++++--- src/models/free/__init__.py | 2 +- src/models/free/apps.py | 53 +++++---- src/models/free/folders.py | 131 +++++++++++++++++++++ 6 files changed, 487 insertions(+), 75 deletions(-) diff --git a/src/api/index.py b/src/api/index.py index 40f5f63..48a0c66 100755 --- a/src/api/index.py +++ b/src/api/index.py @@ -39,8 +39,9 @@ from threading import Thread import utils.params as SYS_ARGS # from utils.agents.actor import * # import utils.agents.actor as actor -import pickle -# from utils.agents.manager import Manager + +from models import model +import models.factory app = Flask(__name__) app.config['SECRET_KEY'] = '!h8-[0v8]247-4-360' @@ -103,19 +104,38 @@ def set_userdata(key): r = reader.view(id,key=key) session[id] = r pass -@app.route("/1/dashboard//") +@app.route("/1/dashboard//",methods=['GET','POST']) def plot_dashboard(id,node): """ This function will plot a given app/folder/node """ - from basic import model + session['key'] = 'cus_D2x3ItYNfWjSY3' - if 'key' in session : + if 'key' in session and id in ['paid','free']: args = {"type":SYS_STORE['class']['read'],"args":SYS_STORE['args']} reader = factory.instance(**args) - plan = reader.view('uid_map/active_plan',key=session['key']) - m = model.instance(plan,id,node) - + # plan = reader.view('users/active_plan',key=session['key']) + logs = reader.view('clients/logs',key = session['key']) + # nodes = reader.view('clients/nodes',key=session['key']) + + # filter = request.get_json(silent=True) + filter = dict(request.form) + m = models.factory.instance(id,data=logs,filter=filter) + lexicon = models.factory.lexicon(logs = logs,models=m) + # m['apps'] = [m['apps'][2]] + # print m['apps'] + args = {} + args['context'] = SYS_ARGS.PARAMS['context'] + args['version'] = models.factory.VERSION + args['models'] = m + args['node'] = node + args['lexicon'] = lexicon + if filter : + args['show'] = filter['id'] + + return render_template('dashboard/board.html',**args) #,context=SYS_ARGS.PARAMS['context'],version=models.factory.VERSION,models=m) + else: + return render_template('error.html',context=SYS_ARGS.PARAMS['context']) pass @app.route('/1/register',methods=['POST']) def register() : @@ -188,15 +208,15 @@ def apps() : return render_template('dashboard/apps/data-grid.html',**WEB_ARGS) else: return render_template('error.html') -@app.route("/1/plan",methods=['GET']) -def active_plan(): - args = {"type":SYS_STORE['class']['read'],"args":SYS_STORE['args']} - factory = DataSourceFactory() - reader = factory.instance(**args) - if 'key' in session : - return (json.dumps(plans),200) - else: - return ('',403) +# @app.route("/1/plan",methods=['GET']) +# def active_plan(): +# args = {"type":SYS_STORE['class']['read'],"args":SYS_STORE['args']} +# factory = DataSourceFactory() +# reader = factory.instance(**args) +# if 'key' in session : +# return (json.dumps(plans),200) +# else: +# return ('',403) @app.route("/1/board",methods=["GET"]) def get_board(): @@ -210,6 +230,7 @@ def get_board(): args['nodes'] = handler.get('nodes') + for id in handler.cache : session[id] = handler.cache[id] @@ -349,7 +370,7 @@ def client_log(): args = dict({"type":SYS_STORE['class']['write'],"args":SYS_STORE['args']}) args['args']['uid'] = session['key'] - row = {"node":request.headers['id'],"date":date,"log":body} + # row = {"node":request.headers['id'],"date":date,"log":body} # # We should make sure that we don't have to archive the data @@ -367,7 +388,7 @@ def client_log(): logs = [body] LIMIT = 40 if len(logs) > LIMIT : - log = logs[:LIMIT] + logs = logs[:LIMIT] document[context][node] = {"date":date,"logs":logs} writer.set(**document) # writer.write (label=context,data=row) diff --git a/src/models/__init__.py b/src/models/__init__.py index dcb57d4..489b6d5 100644 --- a/src/models/__init__.py +++ b/src/models/__init__.py @@ -3,6 +3,7 @@ and will be tightly coupled with the rendering engines (matplotlib or chartjs) """ import pandas as pd +import json class model : """ @@ -11,39 +12,101 @@ class model : """ def __init__(self,**args): """ - @param data - @param node name of the node - @param y_attr attributes on the y_axis - @param x_attr attributes on the x_axis + @param data + @param id identifier of the data being processed """ - self.data = args['data'] - #self.node = args['node'] - self.months = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"} + filter = args['filter'] if 'filter' in args else None self.cache = {} - if 'type' not in args : - self.set("type","scatter") - #self.x_attr = args['x_attr'] - #self.y_attr = args['y_attr'] - #self.set("x",self.data[x_attr].tolist()) - #self.set("y",self.data[y_attr].tolist()) + + # if not filter : + # filter = '1=1' + self.data = pd.DataFrame(args['data']) #.query(filter) + #self.node = args['node'] + if filter : + node = filter['node'] + _name = filter['name'] + print 'FILTER ',node,_name + else: + node = self.data.node.tolist()[0] + _name = list(set(self.data.name.tolist()) -set(['other']))[0] + + # + # let's filter the data we have + # + if self.name() == 'model' : + self.data = self.data[self.data.name == _name] + self.data = self.data[self.data.node == node] + # + # let' regiter the filters here + # + self.set('filter',self.data.name.tolist()) + + if isinstance(_name,list) is False: + _name = [_name] + min_ = self.data[self.data.name.isin(_name)].date.min() + max_ = self.data[self.data.name.isin(_name)].date.max() + + self.set('title',args['id'].replace('folders','folder').replace('apps','app')+' : '+_name[0]) + self.set("period", " ".join([self.format_date(min_),self.format_date(max_)])) + self.set('id',args['id']) + def meta(self): + + if 'data' not in self.cache : + return None + df = self.get('data') + m = pd.DataFrame(df.dtypes,columns=['type']).apply(lambda row: [value in ['float64','int64'] for value in row]) + return m + + # if 'type' not in args : + # self.set("type","scatter") + def name(self): + return self.__class__.__name__.lower() def can_do(self): """ This function will determine if the model can be processed or has met the preconditions for processing """ return self.data.shape[0] > 1 and self.data.shape[1] > 2 + def format_date(self,row): - m = {1:"Jan",2:"Feb",3:"Mar",4:"Apr",5:"May",6:"Jun",7:"Jul",8:"Aug",9:"Sep",10:"Oct",11:"Nov",12:"Dec"} - return "-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ]) + m = {'01':"Jan",'02':"Feb",'03':"Mar",'04':"Apr",'05':"May",'06':"Jun",'07':"Jul",'08':"Aug",'09':"Sep",'10':"Oct",'11':"Nov",'12':"Dec"} + + mon = row[:2] #if not row[:2].startswith('0') else row[:2].replace('0','') + mon = m[str(mon)] + return "-".join([row[3:5],mon,row[6:] ]) #"-".join([m[row['month']],str(row['day']),str(row['year'])]) +" "+ " ".join([str(row['hour']),'h :',str(row['minute']),'min' ]) + def to_json(self): + self.set('name',self.name()) + df = self.get('data').to_dict(orient='records') + self.set('data',df.to_dict(orient='records')) + return self.cache def compute(self): """ We compute a simple regression if and only if a single attribute is provided. The framework of choice to compute the regression (for now) sklearn @TODO: Find ways perhaps to use tensorflow """ + self.set('data',self.data) + self.set('type','line') + m = self.meta() + cols = list(set(m[m.type > 0 ].index.tolist()) - set(['pid','age_in_days','files'])) + cols += ['date','time'] + self.data = self.data.groupby(['node','name','date','time'],as_index=False).mean() + if self.get('id') == 'folders' and 'size_in_kb' in self.data.columns : + self.data.size_in_kb /= 1000000 + + + self.set('data',self.data[cols]) + # self.set('y-axis',{"scales":""}) + self.set('x-axis',{"gridLines":{"display":False},"ticks":{"autoSkip":True,"maxTicksLimit":10}}) + def set(self,key,value): self.cache[key] = value def get(self,key): - return self.cache[key] + if key in self.cache : + return self.cache[key] + elif hasattr(self,key) : + return getattr(self,key) + else: + return None # class simple: # class app_status(model): # """ diff --git a/src/models/factory.py b/src/models/factory.py index b053c6b..203e931 100644 --- a/src/models/factory.py +++ b/src/models/factory.py @@ -1,26 +1,212 @@ import models import free import paid +import numpy as np +import pandas as pd +COLORS = ["#a32638", "#00308f","#006400","#efdecd","#e52b50","#e32636","#5d8aa8","#c46210","#ffbf00","#ff7e00","#ff033e","#96c","#a4c639","#f2f3f4","#cd9575","#915c83","#841b2d","#faebd7","#008000","#8db600","#fbceb1","#7fffd4","#4b5320","#3b444b","#e9d66b","#b2beb5","#87a96b","#f96","#a52a2a","#fdee00","#6e7f80","#568203","#007fff","#f0ffff","#89cff0","#a1caf1","#f4c2c2","#21abcd","#fae7b5","#ffe135","#7c0a02","#848482","#98777b","#bcd4e6","#9f8170","#f5f5dc","#9c2542","#ffe4c4","#3d2b1f","#fe6f5e","#bf4f51","#000","#3d0c02","#253529","#3b3c36","#ffebcd","#a57164","#318ce7","#ace5ee","#faf0be","#00f","#a2a2d0","#1f75fe","#69c","#0d98ba","#0093af","#0087bd","#339","#0247fe","#126180","#8a2be2","#de5d83","#79443b","#0095b6","#e3dac9","#c00","#006a4e","#873260","#0070ff","#b5a642","#cb4154","#1dacd6","#6f0","#bf94e4","#c32148","#ff007f","#08e8de","#d19fe8","#f4bbff","#ff55a3","#fb607f","#004225","#cd7f32","#964b00","#a52a2a","#ffc1cc","#e7feff","#f0dc82","#480607","#800020","#deb887","#c50","#e97451","#8a3324","#bd33a4","#702963","#536872","#5f9ea0","#91a3b0","#006b3c","#ed872d","#e30022","#fff600","#a67b5b","#4b3621","#1e4d2b","#a3c1ad","#c19a6b","#efbbcc","#78866b","#ffef00","#ff0800","#e4717a","#00bfff","#592720","#c41e3a","#0c9","#960018","#d70040","#eb4c42","#ff0038","#ffa6c9","#b31b1b","#99badd","#ed9121","#062a78","#92a1cf","#ace1af","#007ba7","#2f847c","#b2ffff","#4997d0","#de3163","#ec3b83","#007ba7","#2a52be","#6d9bc3","#007aa5","#e03c31","#a0785a","#fad6a5","#36454f","#e68fac","#dfff00","#7fff00","#de3163","#ffb7c5","#cd5c5c","#de6fa1","#a8516e","#aa381e","#7b3f00","#d2691e","#ffa700","#98817b","#e34234","#d2691e","#e4d00a","#fbcce7","#0047ab","#d2691e","#6f4e37","#9bddff","#f88379","#002e63","#8c92ac","#b87333","#da8a67","#ad6f69","#cb6d51","#966","#ff3800","#ff7f50","#f88379","#ff4040","#893f45","#fbec5d","#b31b1b","#6495ed","#fff8dc","#fff8e7","#ffbcd9","#fffdd0","#dc143c","#be0032","#0ff","#00b7eb","#ffff31","#f0e130","#00008b","#654321","#5d3954","#a40000","#08457e","#986960","#cd5b45","#008b8b","#536878","#b8860b","#a9a9a9","#013220","#00416a","#1a2421","#bdb76b","#483c32","#734f96","#8b008b","#036","#556b2f","#ff8c00","#9932cc","#779ecb","#03c03c","#966fd6","#c23b22","#e75480","#039","#872657","#8b0000","#e9967a","#560319","#8fbc8f","#3c1414","#483d8b","#2f4f4f","#177245","#918151","#ffa812","#483c32","#cc4e5c","#00ced1","#9400d3","#9b870c","#00703c","#555","#d70a53","#a9203e","#ef3038","#e9692c","#da3287","#fad6a5","#b94e48","#704241","#c154c1","#004b49","#95b","#c0c","#ffcba4","#ff1493","#843f5b","#f93","#00bfff","#66424d","#1560bd","#c19a6b","#edc9af","#696969","#1e90ff","#d71868","#85bb65","#967117","#00009c","#e1a95f","#555d50","#c2b280","#614051","#f0ead6","#1034a6","#7df9ff","#ff003f","#0ff","#0f0","#6f00ff","#f4bbff","#cf0","#bf00ff","#3f00ff","#8f00ff","#ff0","#50c878","#b48395","#96c8a2","#c19a6b","#801818","#b53389","#f400a1","#e5aa70","#4d5d53","#4f7942","#ff2800","#6c541e","#ce2029","#b22222","#e25822","#fc8eac",] +VERSION=0.1 def instance(id,**args): - """ - Returns an instance of a model given the following : - @param data - @param x_attr - @param y_attr - @param node + """ + Returns an instance of a model given the following : + @param data unprocessed records (not in data-frames), the models will be tasked to generating them + @param filter filters the dataset + """ + collection = {} + data = args['data'] + filter = args['filter'] + + for pkg_name in ['apps','folders'] : + + if pkg_name in data and pkg_name in dir(eval(id)): + records = data[pkg_name] + module = eval(".".join([id,pkg_name])) + if args['filter'] and pkg_name in args['filter']['id'] : + filter = args['filter'] + # + # reformatting ... anomaly with form submission with application/json object + # @TODO: Fix data representation + filter['id'] = filter['id'][0] + filter['name'] = filter['name'][0] + filter['node'] = filter['node'][0] + + else: + filter = None + collection[pkg_name] = [ eval(".".join([id,pkg_name,name]))(id=pkg_name,data=records,filter=filter) for name in dir(module) if not name.startswith('__') and name not in ['pd','np','tf','sklearn']] + # + # let's create the instances and run the models and return the caches of each model + # + for id in collection : + for item in collection[id] : + item.compute() + chart = charting.html.instance(model=item) + item.set('chart',chart) + item = item.to_json + return collection +def lexicon(**args) : + """ + This function will create a lexicon (searchable data-structure), that will power the dashboard + The data will be relevant to the latest logs (for now) + @param args.logs logs + @param id identifier/scope of the logs (apps|folders) + """ + + # logs = pd.DataFrame(args['logs']) + + m = {'apps':['status'],'folders':['name','path','size_in_kb']} + lex = [] + for id in args['logs'] : + if id in ['apps','folders']: + logs = pd.DataFrame(args['logs'][id]) + d = logs[logs.name != 'other'].date.max() + logs = logs[logs.date == d] + cols = ['name','node','date'] + if id == 'apps' : + cols += ['status'] + else: + cols += ['path'] + logs = logs.groupby(cols,as_index=False).mean().round(2) + logs = pd.DataFrame(logs) + logs.index = np.arange(logs.shape[0]) + + logs['id'] = np.repeat(id,logs.shape[0]) + lex += logs.to_dict(orient='records') + + + return lex + +class charting : + class image: + """ + This class will render Searborn image charts to the as images. We are dismissing matplotlib (not elegant enough) + The available charts are the following doughnut, pie, bar,line and scatter. """ - collection = [] - data = args['data'] - for pkg_name in ['apps','folders'] : + def scatter(**args): + model = args['model'] + data = mode.get('data') + x = model.get('x') + y = model.get('y') + return sns.scatterplot(x=x,y=y,hue='time',data=data).get_figure() + def doughnut(**args): + return None + class html : + """ + This function will render an HTML-friendly chart given a pandas-friendly dataset + The model encapsulates basic computation that is designed to handle a dataset in a particular way + """ + @staticmethod + def instance(** args): + m = args['model'] + + chart_type = m.get('type') + pointer = getattr(charting.html,chart_type) + return pointer(model=m) + @staticmethod + def scatter(**args) : + """ + Basic configuration for scatter plot, unprocessed data for visualization + @param model data:{label,measure_o,measure_i} + """ + config = {"type":"scatter","responsive":True} + model = args['model'] + data = model.get('data') + xy = model.meta().type + # + # @TODO: inspect the value in the model's cache + yaxis_name = xy[xy > 0].index.tolist() + xaxis_name = xy[xy ==0].index.tolist() + data = model.get('data')[yaxis_name].copy() + + data['x'] = np.arange(0,data.shape[0]).tolist() + datasets = [ {"label":field,"showLine":False, "fill":False,"backgroundColor":COLORS[yaxis_name.index(field)], "borderColor":COLORS[yaxis_name.index(field)],"data":data[['x', field ]].dropna().rename(columns={field:'y'}).to_dict(orient='records')} for field in yaxis_name ] + config ['data'] = {"datasets":datasets} + # config['options'] = {"scales": {"xAxes": [{"type": 'linear',"position": 'bottom'}]}} + return config + @staticmethod + def doughnut(**args): + """ + Providing the basic configuration for a doughnut plot, the expected data is formatted as follows + @param data data-frame with 2 columns {label,measure) + """ + + config = {"type":"doughnut","responsive":True,"maintainAspectRatio":False} + m = args['model'] + data = m.get('data') + xy = m.meta().type + # + # @TODO : Inspect xaxis,yaxis names in the model's cache + # labels = xy[xy == 0].index.tolist() + x = xy[xy > 0].index.tolist() + N = data.shape[1]-1 + + data.labels = data.labels.str.upper() + config['data'] = {'datasets':[ {"data":data[x[i] ].tolist(),"backgroundColor":COLORS[:data.shape[0]]} for i in range(0,N)]} + config['data']['labels'] = data.labels.tolist() + config['options'] = charting.html.get_options(m) + return config + + @staticmethod + def bar(**args): + """ + Providing the basic configuration for a bar chart, it's a delegation to a line chart + @param model {labels, measure_o,measure_i} + """ + config = charting.html.line(**args) + config['type'] = 'bar' + return config + @staticmethod + def hbar(**args): + """ + Providing configuration for a horizontal bar chart + """ + config = charting.html.line(**args) + config['type'] = 'horizontalBar' + return config + @staticmethod + def get_options(m) : + options = {'scales':{},'legend':{}} + if m.get('y-axis') or m.get('x-axis'): + keys = {'x-axis':'xAxes','y-axis':'yAxes'} + + for key in keys : + if not m.get(key) : + continue + id = keys[key] + options['scales'][id] = [m.get(key)] + if m.get('legend') : + options['legend'] = m.get('legend') + return options + @staticmethod + def line(**args): + """ + Providing the basic chartjs configuration for a line chart, the expected data is formatted as follows + The data-frame must have labels (as is) otherwise numbers will be replaced on the x-axis + @param model with data {labels,...} + """ + config = {"type":"line","responsive":True,"maintainAspectRatio":False} + m = args['model'] + data = pd.DataFrame(m.get('data')) + xy = m.meta().type + yaxis = list(set(data.columns.tolist()) - set(['labels','date','time','pid'])) #xy[xy > 0].index.tolist() - if pkg_name in data and pkg_name in dir(eval(id)): - records = data[pkg_name] - module = eval(".".join([id,pkg_name])) - collection += [ eval(".".join([id,pkg_name,name]))(data=records) for name in dir(module) if not name.startswith('__')] - # - # let's create the instances and run the models and return the caches of each model - # - - return collection + if 'labels' not in data.columns.tolist() : + if 'date' in data.columns and 'time' in data.columns: + labels = data.apply(lambda row: " ".join([row.time[:5] ,'AM' if int(row.time[:2]) > 12 else ' PM']),axis=1).tolist() + else: + labels = range(0,data.shape[0]) + data['labels'] = labels #pd.Series(labels,index=data.index) + datasets = [ {"label":field.upper(),"borderColor":COLORS[yaxis.index(field)],"backgroundColor":COLORS[yaxis.index(field)],"fill":False,"data":data[field].tolist() } for field in yaxis] + config ['data'] = {'datasets':datasets,"labels":data.labels.tolist()} + config['options'] = {'scales':{}} + if m.get('y-axis') or m.get('x-axis'): + keys = {'x-axis':'xAxes','y-axis':'yAxes'} + config['options'] = {'scales':{}} + for key in keys : + if not m.get(key) : + continue + id = keys[key] + config['options']['scales'][id] = [m.get(key)] + if m.get('legend') : + config['options']['legend'] = m.get('legend') + return config diff --git a/src/models/free/__init__.py b/src/models/free/__init__.py index 3331532..f73bba2 100644 --- a/src/models/free/__init__.py +++ b/src/models/free/__init__.py @@ -3,5 +3,5 @@ This package serves various FREE models in order to provide insight for apps and The models will show basic general trends and occasionally a regression if applicable. """ -#import folders +import folders import apps diff --git a/src/models/free/apps.py b/src/models/free/apps.py index 510ecc3..c786cd4 100644 --- a/src/models/free/apps.py +++ b/src/models/free/apps.py @@ -9,7 +9,8 @@ @TODO: Include process counts in the equation so as to add another variable (good for ml) """ # from models.basic import * -# import models.basic.model as model +import pandas as pd +import numpy as np from models import model class status(model): """ @@ -18,54 +19,64 @@ class status(model): """ def __init(self,**args): model.__init__(self,**args) + def compute(self): """ This function performs the actual counts associated with the status of an application """ + self.set('title','Crash Analysis') df = self.data[self.data.name.str.contains('other',na=False)==False] + df = df[df.date == df.date.max()] x_crash = df.status.str.contains('X').sum() x_idle = df.status.str.contains('S').sum() x_run = df.shape[0] - x_crash - x_idle + odf = pd.DataFrame({"labels":['crash','idle','running'],"counts":[x_crash,x_idle,x_run]}) self.set("type","doughnut") # self.set("labels",["crash","idle","running"]) # self.set("data",{"data":[x_crash,x_idle,x_run]}) self.set('data',odf) if x_crash > 0 : - self.set("analysis"," ".join([x_crash,"applications found out of ",str(df.shape[0]),"monitored" ])) + self.set("analysis"," ".join([ str(x_crash),"applications found out of ",str(df.shape[0]),"monitored" ])) class resource(model): """ - This model will group the applications that are monitored and the rest of the system to guage resource consumption (CPU,RAM) + This model will group the applications that are monitored and the rest of the system RAM and CPU usage The intent of this model is to see how resource intensive the selected applications are relative to the rest of the system """ def __init__(self,**args): - model.__init__(self,**args) + model.__init__(self,**args) + self.set('title','Resource Usage') def compute(self): - N = self.data.shape[0] - 1 + max_ = self.data[self.data.name != 'other'].date.max() + m = self.data[self.data.date == max_].groupby(['date']).max()[['cpu','mem']] + o = self.data[self.data.name == 'other'][['time','name','date','cpu','mem']] + o.index = np.arange(0,o.shape[0]) + N = o.shape[0] - 1 + df = pd.DataFrame({"cpu":[o.cpu[N]],"mem": [o.mem[N]]}) + df = df.append(pd.DataFrame({"cpu":m.cpu,"mem":m.mem})) - df = pd.DataFrame(self.data[self.data.name == 'other'].sum()[['cpu','mem']] ) .T - df = df.append(pd.DataFrame( self.data[self.data.name != 'other'].sum()[['cpu','mem']] ).T) df['labels'] = ['other','monitored'] + df.index = np.arange(0,df.shape[0]) self.set("data",df) self.set("type","bar") -class trend(model): - """ - This model is designed to display the trends for a given attribute over a period of time - Additionally there will be a regression line associated with it - """ - def __init__(self,**args): - model.__init__(self,**args) - #self.attr_name = args['name'] - #self.attr_values= args['values'] - def compute(self): - df = self.data[self.data[self.attr_name].isin(self.attr_values)] - cols = ['cpu','mem'] - pass +# class trend(model): +# """ +# This model is designed to display the trends for a given attribute over a period of time +# Additionally there will be a regression line associated with it +# """ +# def __init__(self,**args): +# model.__init__(self,**args) +# self.attr_name = args['name'] +# self.attr_values= args['values'] +# def compute(self): +# df = self.data[self.data[self.attr_name].isin(self.attr_values)] +# cols = ['cpu','mem'] +# pass # model = keras.Sequential([keras.layers.Dense(2, activation=tf.nn.relu,input_shape=(x.shape[1],)),keras.layers.Dense(2, activation=tf.nn.relu),keras.layers.Dense(1)]) # optimizer = tf.train.RMSPropOptimizer(0.001) -# model.compile(loss='mse', optimizer=optimizer,metrics=['mae']) +# model.compile(loss='mse', optimizer=optimizer,metrics=['mae']) \ No newline at end of file diff --git a/src/models/free/folders.py b/src/models/free/folders.py index e69de29..009d69a 100644 --- a/src/models/free/folders.py +++ b/src/models/free/folders.py @@ -0,0 +1,131 @@ +""" + (c) 2018 Smart Top - Free Models + Steve L. Nyemba, steve@the-phi.com + + This file contains free models for smart-top, these are basic models that will serve as data visualization + The most we will do here is a basic regression (basis for prediction). + +""" +import pandas as pd +import numpy as np +from models import model + + +class rank(model): + """ + This model ranks folders by both size and number of files within as well as the age of the files + The folder size will be converted from KB to MB or GB + """ + def __init__(self,**args): + model.__init__(self,**args) + self.SIZE_UNITS = {"MB":1000,"GB":1000000,"TB":1000000000000} + self.TIME_UNITS = {"MONTHS":30,"YEAR":365} + # print self.data.columns + self.set('type','hbar') + + def compute(self): + d = self.data.date.max() + + # self.data['size MB'] = (self.data.size_in_kb / 1000) + + self.data = self.data[self.data.date == d][['name','size_in_kb']].sort_values(['size_in_kb']) + GB = 1000000000 + if self.data.size_in_kb.max() < GB : + self.data['size_in_kb'] /= 1000000 + unit = 'MB' + else: + self.data['size_in_kb'] /= 1000000000 + unit = 'GB' + self.data.size_in_kb = np.round(self.data.size_in_kb,2) + self.data.name = self.data.name.str.upper() + self.set('labels',self.data.name.tolist()) + self.data.columns = ['labels','folder size (:unit)'.replace(':unit',unit)] + self.set('data',self.data) + self.set('y-axis',{"gridLines":{"display":False}}) + self.set('x-axis',{"ticks":{"beginAtZero":True},"scaleLabel":{"labelString":"Folder Size (:unit)".replace(':unit',unit),"display":True}}) + # self.set('y-axis',{"gridLines":{"display":False},"ticks":{"beginAtZero":True}}) + self.set('legend',{"display":False}) + # self.set('x-axis',{ 'scaleLabel':{'display':True, 'labelString':'Folder Size in MB'}}) + self.set('title','Folder ranking (:unit)'.replace(':unit',unit)) + + pass +class files(model): + """ + This model provides a status of the latest file counts by folder. + This allows to gauge the current state of affairs + @TODO: Add drive stats to this will make it more meaningful + """ + def __init__(self,**args): + model.__init__(self,**args) + self.set('type','doughnut') + self.set('title','file count') + self.data = self.filter() + self.data = self.data.groupby(['path','node','date','time'],as_index=False).max() + self.data = self.data.rename(columns={'name':'labels'}) + def filter(self): + """ + return the latest entry for analysis as a status model would + """ + value = self.data.date.max() + return self.data[ self.data.date == value ] + + def compute(self): + self.data = self.data[['labels','files']] + self.set('data',self.data) + # print self.data.T + self.set('labels','name') + self.set('legend',{"position":'right'}) + +class size(files): + """ + This model provides a status of the folder sizes given the latest logs + """ + def __init__(self,**args): + files.__init__(self,**args) + def compute(self): + self.data = self.data[['labels','size_in_kb']] + + self.data.size_in_kb /= 1000000 + if self.data.size_in_kb.max() > 999999 : + self.data.size_in_kb /= 1000 + self.set("title","Folder Size in GB") + else: + self.set("title","Folder Size in MB") + self.data.size_in_kb = np.round(self.data.size_in_kb,4) + self.data = self.data.rename(columns={'size_in_kb':'size'}) + # self.data = self.data.rename(columns={"size_in_kb":"size (MB)"}) + + + self.set("data",self.data) + self.set('legend',{"position":'right'}) + # self.set('labels','name') +class age(files): + """ + Provides insight to locate where older files are, file age is provided in days,months and years + The data is based on the latest logs recorded by the data collector on {node} + """ + def __init__(self,**args): + files.__init__(self,**args) + def compute(self): + self.data = self.data[['labels','age_in_days']] + + if self.data.age_in_days.max() > 365 : + self.data.age_in_days /= 365 + self.set("title","File Age in Yr") + unit = "Yr" + elif self.data.age_in_days.max() > 30 : + self.data.age_in_days /= 30 + self.set("title","File Age in Mo") + unit = "Mo" + + else: + self.set("title","File Age in Days") + unit = "Day" + + self.data.age_in_days = np.round(self.data.age_in_days,4) + self.data = self.data.rename(columns={"age_in_days":"age ("+unit+")"}) + + + self.set("data",self.data) + self.set('legend',{"position":'right'}) + # self.set('labels','name')