parent
b74c752d53
commit
b290632391
@ -1,26 +1,212 @@
|
|||||||
import models
|
import models
|
||||||
import free
|
import free
|
||||||
import paid
|
import paid
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
COLORS = ["#a32638", "#00308f","#006400","#efdecd","#e52b50","#e32636","#5d8aa8","#c46210","#ffbf00","#ff7e00","#ff033e","#96c","#a4c639","#f2f3f4","#cd9575","#915c83","#841b2d","#faebd7","#008000","#8db600","#fbceb1","#7fffd4","#4b5320","#3b444b","#e9d66b","#b2beb5","#87a96b","#f96","#a52a2a","#fdee00","#6e7f80","#568203","#007fff","#f0ffff","#89cff0","#a1caf1","#f4c2c2","#21abcd","#fae7b5","#ffe135","#7c0a02","#848482","#98777b","#bcd4e6","#9f8170","#f5f5dc","#9c2542","#ffe4c4","#3d2b1f","#fe6f5e","#bf4f51","#000","#3d0c02","#253529","#3b3c36","#ffebcd","#a57164","#318ce7","#ace5ee","#faf0be","#00f","#a2a2d0","#1f75fe","#69c","#0d98ba","#0093af","#0087bd","#339","#0247fe","#126180","#8a2be2","#de5d83","#79443b","#0095b6","#e3dac9","#c00","#006a4e","#873260","#0070ff","#b5a642","#cb4154","#1dacd6","#6f0","#bf94e4","#c32148","#ff007f","#08e8de","#d19fe8","#f4bbff","#ff55a3","#fb607f","#004225","#cd7f32","#964b00","#a52a2a","#ffc1cc","#e7feff","#f0dc82","#480607","#800020","#deb887","#c50","#e97451","#8a3324","#bd33a4","#702963","#536872","#5f9ea0","#91a3b0","#006b3c","#ed872d","#e30022","#fff600","#a67b5b","#4b3621","#1e4d2b","#a3c1ad","#c19a6b","#efbbcc","#78866b","#ffef00","#ff0800","#e4717a","#00bfff","#592720","#c41e3a","#0c9","#960018","#d70040","#eb4c42","#ff0038","#ffa6c9","#b31b1b","#99badd","#ed9121","#062a78","#92a1cf","#ace1af","#007ba7","#2f847c","#b2ffff","#4997d0","#de3163","#ec3b83","#007ba7","#2a52be","#6d9bc3","#007aa5","#e03c31","#a0785a","#fad6a5","#36454f","#e68fac","#dfff00","#7fff00","#de3163","#ffb7c5","#cd5c5c","#de6fa1","#a8516e","#aa381e","#7b3f00","#d2691e","#ffa700","#98817b","#e34234","#d2691e","#e4d00a","#fbcce7","#0047ab","#d2691e","#6f4e37","#9bddff","#f88379","#002e63","#8c92ac","#b87333","#da8a67","#ad6f69","#cb6d51","#966","#ff3800","#ff7f50","#f88379","#ff4040","#893f45","#fbec5d","#b31b1b","#6495ed","#fff8dc","#fff8e7","#ffbcd9","#fffdd0","#dc143c","#be0032","#0ff","#00b7eb","#ffff31","#f0e130","#00008b","#654321","#5d3954","#a40000","#08457e","#986960","#cd5b45","#008b8b","#536878","#b8860b","#a9a9a9","#013220","#00416a","#1a2421","#bdb76b","#483c32","#734f96","#8b008b","#036","#556b2f","#ff8c00","#9932cc","#779ecb","#03c03c","#966fd6","#c23b22","#e75480","#039","#872657","#8b0000","#e9967a","#560319","#8fbc8f","#3c1414","#483d8b","#2f4f4f","#177245","#918151","#ffa812","#483c32","#cc4e5c","#00ced1","#9400d3","#9b870c","#00703c","#555","#d70a53","#a9203e","#ef3038","#e9692c","#da3287","#fad6a5","#b94e48","#704241","#c154c1","#004b49","#95b","#c0c","#ffcba4","#ff1493","#843f5b","#f93","#00bfff","#66424d","#1560bd","#c19a6b","#edc9af","#696969","#1e90ff","#d71868","#85bb65","#967117","#00009c","#e1a95f","#555d50","#c2b280","#614051","#f0ead6","#1034a6","#7df9ff","#ff003f","#0ff","#0f0","#6f00ff","#f4bbff","#cf0","#bf00ff","#3f00ff","#8f00ff","#ff0","#50c878","#b48395","#96c8a2","#c19a6b","#801818","#b53389","#f400a1","#e5aa70","#4d5d53","#4f7942","#ff2800","#6c541e","#ce2029","#b22222","#e25822","#fc8eac",]
|
||||||
|
VERSION=0.1
|
||||||
def instance(id,**args):
|
def instance(id,**args):
|
||||||
"""
|
"""
|
||||||
Returns an instance of a model given the following :
|
Returns an instance of a model given the following :
|
||||||
@param data
|
@param data unprocessed records (not in data-frames), the models will be tasked to generating them
|
||||||
@param x_attr
|
@param filter filters the dataset
|
||||||
@param y_attr
|
|
||||||
@param node
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
collection = []
|
collection = {}
|
||||||
data = args['data']
|
data = args['data']
|
||||||
|
filter = args['filter']
|
||||||
|
|
||||||
for pkg_name in ['apps','folders'] :
|
for pkg_name in ['apps','folders'] :
|
||||||
|
|
||||||
if pkg_name in data and pkg_name in dir(eval(id)):
|
if pkg_name in data and pkg_name in dir(eval(id)):
|
||||||
records = data[pkg_name]
|
records = data[pkg_name]
|
||||||
module = eval(".".join([id,pkg_name]))
|
module = eval(".".join([id,pkg_name]))
|
||||||
collection += [ eval(".".join([id,pkg_name,name]))(data=records) for name in dir(module) if not name.startswith('__')]
|
if args['filter'] and pkg_name in args['filter']['id'] :
|
||||||
|
filter = args['filter']
|
||||||
|
#
|
||||||
|
# reformatting ... anomaly with form submission with application/json object
|
||||||
|
# @TODO: Fix data representation
|
||||||
|
filter['id'] = filter['id'][0]
|
||||||
|
filter['name'] = filter['name'][0]
|
||||||
|
filter['node'] = filter['node'][0]
|
||||||
|
|
||||||
|
else:
|
||||||
|
filter = None
|
||||||
|
collection[pkg_name] = [ eval(".".join([id,pkg_name,name]))(id=pkg_name,data=records,filter=filter) for name in dir(module) if not name.startswith('__') and name not in ['pd','np','tf','sklearn']]
|
||||||
#
|
#
|
||||||
# let's create the instances and run the models and return the caches of each model
|
# let's create the instances and run the models and return the caches of each model
|
||||||
#
|
#
|
||||||
|
for id in collection :
|
||||||
|
for item in collection[id] :
|
||||||
|
item.compute()
|
||||||
|
chart = charting.html.instance(model=item)
|
||||||
|
item.set('chart',chart)
|
||||||
|
item = item.to_json
|
||||||
return collection
|
return collection
|
||||||
|
def lexicon(**args) :
|
||||||
|
"""
|
||||||
|
This function will create a lexicon (searchable data-structure), that will power the dashboard
|
||||||
|
The data will be relevant to the latest logs (for now)
|
||||||
|
@param args.logs logs
|
||||||
|
@param id identifier/scope of the logs (apps|folders)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# logs = pd.DataFrame(args['logs'])
|
||||||
|
|
||||||
|
m = {'apps':['status'],'folders':['name','path','size_in_kb']}
|
||||||
|
lex = []
|
||||||
|
for id in args['logs'] :
|
||||||
|
if id in ['apps','folders']:
|
||||||
|
logs = pd.DataFrame(args['logs'][id])
|
||||||
|
d = logs[logs.name != 'other'].date.max()
|
||||||
|
logs = logs[logs.date == d]
|
||||||
|
cols = ['name','node','date']
|
||||||
|
if id == 'apps' :
|
||||||
|
cols += ['status']
|
||||||
|
else:
|
||||||
|
cols += ['path']
|
||||||
|
logs = logs.groupby(cols,as_index=False).mean().round(2)
|
||||||
|
logs = pd.DataFrame(logs)
|
||||||
|
logs.index = np.arange(logs.shape[0])
|
||||||
|
|
||||||
|
logs['id'] = np.repeat(id,logs.shape[0])
|
||||||
|
lex += logs.to_dict(orient='records')
|
||||||
|
|
||||||
|
|
||||||
|
return lex
|
||||||
|
|
||||||
|
class charting :
|
||||||
|
class image:
|
||||||
|
"""
|
||||||
|
This class will render Searborn image charts to the as images. We are dismissing matplotlib (not elegant enough)
|
||||||
|
The available charts are the following doughnut, pie, bar,line and scatter.
|
||||||
|
"""
|
||||||
|
def scatter(**args):
|
||||||
|
model = args['model']
|
||||||
|
data = mode.get('data')
|
||||||
|
x = model.get('x')
|
||||||
|
y = model.get('y')
|
||||||
|
return sns.scatterplot(x=x,y=y,hue='time',data=data).get_figure()
|
||||||
|
def doughnut(**args):
|
||||||
|
return None
|
||||||
|
class html :
|
||||||
|
"""
|
||||||
|
This function will render an HTML-friendly chart given a pandas-friendly dataset
|
||||||
|
The model encapsulates basic computation that is designed to handle a dataset in a particular way
|
||||||
|
"""
|
||||||
|
@staticmethod
|
||||||
|
def instance(** args):
|
||||||
|
m = args['model']
|
||||||
|
|
||||||
|
chart_type = m.get('type')
|
||||||
|
pointer = getattr(charting.html,chart_type)
|
||||||
|
return pointer(model=m)
|
||||||
|
@staticmethod
|
||||||
|
def scatter(**args) :
|
||||||
|
"""
|
||||||
|
Basic configuration for scatter plot, unprocessed data for visualization
|
||||||
|
@param model data:{label,measure_o,measure_i}
|
||||||
|
"""
|
||||||
|
config = {"type":"scatter","responsive":True}
|
||||||
|
model = args['model']
|
||||||
|
data = model.get('data')
|
||||||
|
xy = model.meta().type
|
||||||
|
#
|
||||||
|
# @TODO: inspect the value in the model's cache
|
||||||
|
yaxis_name = xy[xy > 0].index.tolist()
|
||||||
|
xaxis_name = xy[xy ==0].index.tolist()
|
||||||
|
data = model.get('data')[yaxis_name].copy()
|
||||||
|
|
||||||
|
data['x'] = np.arange(0,data.shape[0]).tolist()
|
||||||
|
datasets = [ {"label":field,"showLine":False, "fill":False,"backgroundColor":COLORS[yaxis_name.index(field)], "borderColor":COLORS[yaxis_name.index(field)],"data":data[['x', field ]].dropna().rename(columns={field:'y'}).to_dict(orient='records')} for field in yaxis_name ]
|
||||||
|
config ['data'] = {"datasets":datasets}
|
||||||
|
# config['options'] = {"scales": {"xAxes": [{"type": 'linear',"position": 'bottom'}]}}
|
||||||
|
return config
|
||||||
|
@staticmethod
|
||||||
|
def doughnut(**args):
|
||||||
|
"""
|
||||||
|
Providing the basic configuration for a doughnut plot, the expected data is formatted as follows
|
||||||
|
@param data data-frame with 2 columns {label,measure)
|
||||||
|
"""
|
||||||
|
|
||||||
|
config = {"type":"doughnut","responsive":True,"maintainAspectRatio":False}
|
||||||
|
m = args['model']
|
||||||
|
data = m.get('data')
|
||||||
|
xy = m.meta().type
|
||||||
|
#
|
||||||
|
# @TODO : Inspect xaxis,yaxis names in the model's cache
|
||||||
|
# labels = xy[xy == 0].index.tolist()
|
||||||
|
x = xy[xy > 0].index.tolist()
|
||||||
|
N = data.shape[1]-1
|
||||||
|
|
||||||
|
data.labels = data.labels.str.upper()
|
||||||
|
config['data'] = {'datasets':[ {"data":data[x[i] ].tolist(),"backgroundColor":COLORS[:data.shape[0]]} for i in range(0,N)]}
|
||||||
|
config['data']['labels'] = data.labels.tolist()
|
||||||
|
config['options'] = charting.html.get_options(m)
|
||||||
|
return config
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def bar(**args):
|
||||||
|
"""
|
||||||
|
Providing the basic configuration for a bar chart, it's a delegation to a line chart
|
||||||
|
@param model {labels, measure_o,measure_i}
|
||||||
|
"""
|
||||||
|
config = charting.html.line(**args)
|
||||||
|
config['type'] = 'bar'
|
||||||
|
return config
|
||||||
|
@staticmethod
|
||||||
|
def hbar(**args):
|
||||||
|
"""
|
||||||
|
Providing configuration for a horizontal bar chart
|
||||||
|
"""
|
||||||
|
config = charting.html.line(**args)
|
||||||
|
config['type'] = 'horizontalBar'
|
||||||
|
return config
|
||||||
|
@staticmethod
|
||||||
|
def get_options(m) :
|
||||||
|
options = {'scales':{},'legend':{}}
|
||||||
|
if m.get('y-axis') or m.get('x-axis'):
|
||||||
|
keys = {'x-axis':'xAxes','y-axis':'yAxes'}
|
||||||
|
|
||||||
|
for key in keys :
|
||||||
|
if not m.get(key) :
|
||||||
|
continue
|
||||||
|
id = keys[key]
|
||||||
|
options['scales'][id] = [m.get(key)]
|
||||||
|
if m.get('legend') :
|
||||||
|
options['legend'] = m.get('legend')
|
||||||
|
return options
|
||||||
|
@staticmethod
|
||||||
|
def line(**args):
|
||||||
|
"""
|
||||||
|
Providing the basic chartjs configuration for a line chart, the expected data is formatted as follows
|
||||||
|
The data-frame must have labels (as is) otherwise numbers will be replaced on the x-axis
|
||||||
|
@param model with data {labels,...}
|
||||||
|
"""
|
||||||
|
config = {"type":"line","responsive":True,"maintainAspectRatio":False}
|
||||||
|
m = args['model']
|
||||||
|
data = pd.DataFrame(m.get('data'))
|
||||||
|
xy = m.meta().type
|
||||||
|
yaxis = list(set(data.columns.tolist()) - set(['labels','date','time','pid'])) #xy[xy > 0].index.tolist()
|
||||||
|
|
||||||
|
if 'labels' not in data.columns.tolist() :
|
||||||
|
if 'date' in data.columns and 'time' in data.columns:
|
||||||
|
labels = data.apply(lambda row: " ".join([row.time[:5] ,'AM' if int(row.time[:2]) > 12 else ' PM']),axis=1).tolist()
|
||||||
|
else:
|
||||||
|
labels = range(0,data.shape[0])
|
||||||
|
data['labels'] = labels #pd.Series(labels,index=data.index)
|
||||||
|
datasets = [ {"label":field.upper(),"borderColor":COLORS[yaxis.index(field)],"backgroundColor":COLORS[yaxis.index(field)],"fill":False,"data":data[field].tolist() } for field in yaxis]
|
||||||
|
config ['data'] = {'datasets':datasets,"labels":data.labels.tolist()}
|
||||||
|
config['options'] = {'scales':{}}
|
||||||
|
if m.get('y-axis') or m.get('x-axis'):
|
||||||
|
keys = {'x-axis':'xAxes','y-axis':'yAxes'}
|
||||||
|
config['options'] = {'scales':{}}
|
||||||
|
for key in keys :
|
||||||
|
if not m.get(key) :
|
||||||
|
continue
|
||||||
|
id = keys[key]
|
||||||
|
config['options']['scales'][id] = [m.get(key)]
|
||||||
|
if m.get('legend') :
|
||||||
|
config['options']['legend'] = m.get('legend')
|
||||||
|
return config
|
||||||
|
|
||||||
|
@ -0,0 +1,131 @@
|
|||||||
|
"""
|
||||||
|
(c) 2018 Smart Top - Free Models
|
||||||
|
Steve L. Nyemba, steve@the-phi.com
|
||||||
|
|
||||||
|
This file contains free models for smart-top, these are basic models that will serve as data visualization
|
||||||
|
The most we will do here is a basic regression (basis for prediction).
|
||||||
|
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from models import model
|
||||||
|
|
||||||
|
|
||||||
|
class rank(model):
|
||||||
|
"""
|
||||||
|
This model ranks folders by both size and number of files within as well as the age of the files
|
||||||
|
The folder size will be converted from KB to MB or GB
|
||||||
|
"""
|
||||||
|
def __init__(self,**args):
|
||||||
|
model.__init__(self,**args)
|
||||||
|
self.SIZE_UNITS = {"MB":1000,"GB":1000000,"TB":1000000000000}
|
||||||
|
self.TIME_UNITS = {"MONTHS":30,"YEAR":365}
|
||||||
|
# print self.data.columns
|
||||||
|
self.set('type','hbar')
|
||||||
|
|
||||||
|
def compute(self):
|
||||||
|
d = self.data.date.max()
|
||||||
|
|
||||||
|
# self.data['size MB'] = (self.data.size_in_kb / 1000)
|
||||||
|
|
||||||
|
self.data = self.data[self.data.date == d][['name','size_in_kb']].sort_values(['size_in_kb'])
|
||||||
|
GB = 1000000000
|
||||||
|
if self.data.size_in_kb.max() < GB :
|
||||||
|
self.data['size_in_kb'] /= 1000000
|
||||||
|
unit = 'MB'
|
||||||
|
else:
|
||||||
|
self.data['size_in_kb'] /= 1000000000
|
||||||
|
unit = 'GB'
|
||||||
|
self.data.size_in_kb = np.round(self.data.size_in_kb,2)
|
||||||
|
self.data.name = self.data.name.str.upper()
|
||||||
|
self.set('labels',self.data.name.tolist())
|
||||||
|
self.data.columns = ['labels','folder size (:unit)'.replace(':unit',unit)]
|
||||||
|
self.set('data',self.data)
|
||||||
|
self.set('y-axis',{"gridLines":{"display":False}})
|
||||||
|
self.set('x-axis',{"ticks":{"beginAtZero":True},"scaleLabel":{"labelString":"Folder Size (:unit)".replace(':unit',unit),"display":True}})
|
||||||
|
# self.set('y-axis',{"gridLines":{"display":False},"ticks":{"beginAtZero":True}})
|
||||||
|
self.set('legend',{"display":False})
|
||||||
|
# self.set('x-axis',{ 'scaleLabel':{'display':True, 'labelString':'Folder Size in MB'}})
|
||||||
|
self.set('title','Folder ranking (:unit)'.replace(':unit',unit))
|
||||||
|
|
||||||
|
pass
|
||||||
|
class files(model):
|
||||||
|
"""
|
||||||
|
This model provides a status of the latest file counts by folder.
|
||||||
|
This allows to gauge the current state of affairs
|
||||||
|
@TODO: Add drive stats to this will make it more meaningful
|
||||||
|
"""
|
||||||
|
def __init__(self,**args):
|
||||||
|
model.__init__(self,**args)
|
||||||
|
self.set('type','doughnut')
|
||||||
|
self.set('title','file count')
|
||||||
|
self.data = self.filter()
|
||||||
|
self.data = self.data.groupby(['path','node','date','time'],as_index=False).max()
|
||||||
|
self.data = self.data.rename(columns={'name':'labels'})
|
||||||
|
def filter(self):
|
||||||
|
"""
|
||||||
|
return the latest entry for analysis as a status model would
|
||||||
|
"""
|
||||||
|
value = self.data.date.max()
|
||||||
|
return self.data[ self.data.date == value ]
|
||||||
|
|
||||||
|
def compute(self):
|
||||||
|
self.data = self.data[['labels','files']]
|
||||||
|
self.set('data',self.data)
|
||||||
|
# print self.data.T
|
||||||
|
self.set('labels','name')
|
||||||
|
self.set('legend',{"position":'right'})
|
||||||
|
|
||||||
|
class size(files):
|
||||||
|
"""
|
||||||
|
This model provides a status of the folder sizes given the latest logs
|
||||||
|
"""
|
||||||
|
def __init__(self,**args):
|
||||||
|
files.__init__(self,**args)
|
||||||
|
def compute(self):
|
||||||
|
self.data = self.data[['labels','size_in_kb']]
|
||||||
|
|
||||||
|
self.data.size_in_kb /= 1000000
|
||||||
|
if self.data.size_in_kb.max() > 999999 :
|
||||||
|
self.data.size_in_kb /= 1000
|
||||||
|
self.set("title","Folder Size in GB")
|
||||||
|
else:
|
||||||
|
self.set("title","Folder Size in MB")
|
||||||
|
self.data.size_in_kb = np.round(self.data.size_in_kb,4)
|
||||||
|
self.data = self.data.rename(columns={'size_in_kb':'size'})
|
||||||
|
# self.data = self.data.rename(columns={"size_in_kb":"size (MB)"})
|
||||||
|
|
||||||
|
|
||||||
|
self.set("data",self.data)
|
||||||
|
self.set('legend',{"position":'right'})
|
||||||
|
# self.set('labels','name')
|
||||||
|
class age(files):
|
||||||
|
"""
|
||||||
|
Provides insight to locate where older files are, file age is provided in days,months and years
|
||||||
|
The data is based on the latest logs recorded by the data collector on {node}
|
||||||
|
"""
|
||||||
|
def __init__(self,**args):
|
||||||
|
files.__init__(self,**args)
|
||||||
|
def compute(self):
|
||||||
|
self.data = self.data[['labels','age_in_days']]
|
||||||
|
|
||||||
|
if self.data.age_in_days.max() > 365 :
|
||||||
|
self.data.age_in_days /= 365
|
||||||
|
self.set("title","File Age in Yr")
|
||||||
|
unit = "Yr"
|
||||||
|
elif self.data.age_in_days.max() > 30 :
|
||||||
|
self.data.age_in_days /= 30
|
||||||
|
self.set("title","File Age in Mo")
|
||||||
|
unit = "Mo"
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.set("title","File Age in Days")
|
||||||
|
unit = "Day"
|
||||||
|
|
||||||
|
self.data.age_in_days = np.round(self.data.age_in_days,4)
|
||||||
|
self.data = self.data.rename(columns={"age_in_days":"age ("+unit+")"})
|
||||||
|
|
||||||
|
|
||||||
|
self.set("data",self.data)
|
||||||
|
self.set('legend',{"position":'right'})
|
||||||
|
# self.set('labels','name')
|
Loading…
Reference in new issue