diff --git a/src/setup.py b/src/setup.py new file mode 100644 index 0000000..c5220ca --- /dev/null +++ b/src/setup.py @@ -0,0 +1,37 @@ +""" +""" +from setuptools import setup, find_packages +import os +import sys +def read(fname): + return open(os.path.join(os.path.dirname(__file__), fname)).read() +args = { + "name":"smart-top", + "version":"1.0.0", + "author":"The Phi Technology LLC","author_email":"info@the-phi.com", + "license":"MIT", + "packages":["smart"]} +args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite'] +args["install_requires"] = ['pandas','numpy','requests'] +args["url"] = "https://dev.the-phi.com/git/steve/smart-top.git" + +if sys.version_info[0] == 2 : + args['use_2to3'] = True + args['use_2to3_exclude_fixers']=['lib2to3.fixes.fix_import'] +setup(**args) +# setup( +# name = "data-transport", +# version = "1.0", +# author = "The Phi Technology LLC", +# author_email = "steve@the-phi.com", +# license = "MIT", +# packages=['transport'], +# keywords=['mongodb','couchdb','rabbitmq','file','read','write','s3'], +# install_requires = ['pymongo','numpy','cloudant','pika','boto','flask-session','smart_open'], +# url="https://dev.the-phi.com/git/steve/data-transport.git", +# use_2to3=True, +# long_description=read('README.md'), +# convert_2to3_doctests=['README.md'], +# #use_2to3_fixers=['your.fixers'], +# use_2to3_exclude_fixers=['lib2to3.fixes.fix_import'], +# ) diff --git a/src/smart/__init__.py b/src/smart/__init__.py new file mode 100644 index 0000000..bb40640 --- /dev/null +++ b/src/smart/__init__.py @@ -0,0 +1,49 @@ +""" +This framework allows data to be logged to a given data store i.e : + - disk, cloud (google, dropbox, box, sugarsync or s3) or a queue server +The intent of the framework is to work as a standalone or embedded in code as a logging framework +usage: + +dependencies : + data-transport pip install git+https://dev.the-phi.com/git/steve/data-transport.git +""" +import smart.top +import smart.folder +import smart.logger +# from transport import factory + +# class logger : +# """ +# This class is a basic logger, it will log data regardless of the types of data, We will have subclasses that will implement various data extraction schemas: +# - processes (top), + +# """ +# def __init__(self,**args): +# """ +# :store data store (disk,mongo,couch,google,dropbox) +# :args arguments to pass for the data-store (read transport documentation) +# :notify function that returns true/false for notification +# """ + +# self.store = factory.instance(type=store,args=args['args']) +# if 'notify' in args : +# self.notify = args +# pass + +# def log(self,row): +# """ +# This function will log data to a data store +# :row row to be stored +# """ +# self.store.write(row=row) +# if(hasattr(self,'notify')): +# if (self.notify(row)) : +# # +# # Let us notify the backend by generating a report and submitting it +# # +# stream = self.get.report() +# pass +# else: +# pass +# def report(self) : + diff --git a/src/smart/alert/__init__.py b/src/smart/alert/__init__.py new file mode 100644 index 0000000..b513d3c --- /dev/null +++ b/src/smart/alert/__init__.py @@ -0,0 +1,15 @@ +""" +This file will submit an alert to either a mailbox given a set of parameters, this will perform as following : + - as-a-service + - embedded +""" + +check = None +def post(**args): + """ + This function will submit a report to a given target provided some input + :key will perform as-a-service + :data data that will be submitted to smtp/queue server + :smtp will send the file to a mailbox + """ + pass \ No newline at end of file diff --git a/src/smart/folder/__init__.py b/src/smart/folder/__init__.py new file mode 100644 index 0000000..45c8f75 --- /dev/null +++ b/src/smart/folder/__init__.py @@ -0,0 +1,63 @@ +""" +This file is designed to retrieve information on a folder +{files,size,hash} +""" +import subprocess +import sys +import re +import os +import pandas as pd +import io +import datetime +class Util : + def size(self,stream): + + + PATTERN = '(^.+)([A-Z]+$)' + value,units = re.match('^(.+)([A-Z]+$)',stream).groups() + value = float(value) + if 'G' == units : + value *= 1000 + elif 'K' == units: + value /= 1000 + units = 'MB' + return {"size":value,"units":units} + def content(self,stream): + return {"content":stream.split(' ')[0].strip()} + + +def read(**args): + """ + The path can also take in regular expressions + """ + cmd = {"size":"du -sh :path","content":"find :path -type f -exec md5sum {} + | sort -z|md5sum"} + r = {} + util = Util() + for key in cmd : + _cmd = cmd[key] + handler = subprocess.Popen(_cmd.replace(':path',args['path']),shell=True,stdout=subprocess.PIPE,encoding='utf-8') + stream = handler.communicate()[0] + + if sys.version_info[0] > 2 : + rows = str(stream).split('\n') + else: + rows = stream.split('\n') + if key == 'size' : + rows = rows[0] + rows = util.size(rows.split('\t')[0]) + elif key == 'content' : + # + # There is a hash key that is generated and should be extracted + rows = rows[0] + rows = util.content(rows) + + r = dict(r, **rows) + r['path'] = args['path'] + r['name'] = args['path'].split(os.sep)[-1:][0] + r['node'] = os.uname()[1] + r['date'] = datetime.datetime.now().strftime('%m-%d-%Y') + r['time'] = datetime.datetime.now().strftime('%H:%M:%S') + + return pd.DataFrame([r]) + + pass diff --git a/src/smart/folder/__main__.py b/src/smart/folder/__main__.py new file mode 100644 index 0000000..1166748 --- /dev/null +++ b/src/smart/folder/__main__.py @@ -0,0 +1,2 @@ +import smart.folder +print (smart.folder.read(path='/home/steve/dev/data/vumc/aou')) \ No newline at end of file diff --git a/src/smart/logger/__init__.py b/src/smart/logger/__init__.py new file mode 100644 index 0000000..05496d5 --- /dev/null +++ b/src/smart/logger/__init__.py @@ -0,0 +1,52 @@ +import pandas as pd +import numpy as np +import transport +import datetime +import io +import json +import requests + +def subscribe (self,**args) : + """ + This function will subscribe an email to a given service (report,notification). If already susbcribed no further action will be performed + :email provide a valid email for the free plan. Upgrades will be done via the website + :id service identifier accepted values are GOOGLE_DRIVE,DROPBOX,BOX,ONE_DRIVE + + """ + url = "https://the-phi.com/store/smart-top/subscribe" + SERVICES=['GOOGLE','DROPBOX','BOX','ONE_DRIVE'] + if args['id'].upper() in SERVICES : + data = {"email":args['email']} + requests.post(url,data=data) + pass + +def log(**args) : + """ + This function will write to a designated location provided a set of inputs + :store mongo,file,couch,api + """ + # + # @TODO: Provide facility to write to a given cloud store (google,one-drive ...) + # This will have to be supported by some sort of subscription service + # + STORE_MAP = {"mongo":"MongoWriter","disk":"DiskWriter","couch":"CouchWriter",'sqlite':'SQLiteWriter'} + if 'store' not in args : + _id = 'console' + else: + _id = 'disk' if args['store'] == 'file' else args['store'] + _id = 'disk' if _id == 'sqlite' else _id + if _id == 'console' : + """ + We are going to print whatever we have to the console ... using the tool in cli mode + """ + print() + print (args['data']) + print () + # stream = args['memory'] + # stream.write(json.dumps(args['row']) if isinstance(args['row'],dict) else args['row']) + # stream.write("\n") + else: + store_type = ".".join([args['store'],STORE_MAP[_id]]) + store_args = args['params'] + store = transport.factory.instance(type=store_type,args=store_args) + store.write( args['row']) \ No newline at end of file diff --git a/src/smart/top/.__init__.py.swp b/src/smart/top/.__init__.py.swp new file mode 100644 index 0000000..0728c9b Binary files /dev/null and b/src/smart/top/.__init__.py.swp differ diff --git a/src/smart/top/__init__.py b/src/smart/top/__init__.py new file mode 100644 index 0000000..e444963 --- /dev/null +++ b/src/smart/top/__init__.py @@ -0,0 +1,124 @@ +""" +This file contains class and functions that extract data from running processes like top and stores them into a data store of the calling codes choice +dependencies: + - top (on the os) +@TODO: + Test this thing on windows to see if it works +""" +import pandas as pd +import numpy as np +import subprocess +import os +import datetime +# from transport import factory +import sys +import hashlib +class Util: + + def app(self,stream): + """ + Formatting application name, sometimes the name has parameters os separators ... + """ + index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1 + cmd = " ".join(stream[:index]) if index > 0 else " ".join(stream) + + if ' ' in cmd.split(os.sep)[len(cmd.split(os.sep))-1] : + p = cmd.split(os.sep)[len(cmd.split(os.sep))-1].split(' ') + name = p[0] + args = " ".join(p[1:]) + else: + name = cmd.split('/')[len(cmd.split(os.sep))-1] + args = " ".join(stream[index:]) if index > 0 else "" + + return [name,cmd,args] + def parse(self,rows,xchar=';'): + """ + This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted + """ + m = [] + TIME_INDEX = 5 + ARGS_INDEX = 6 + + for item in rows : + if rows.index(item) != 0 : + parts = item.split(xchar) + row = parts[:TIME_INDEX] + row.append(' '.join(parts[TIME_INDEX:ARGS_INDEX])) + row += self.app(parts[ARGS_INDEX:]) + else: + row = item.split(xchar) + row = (xchar.join(row)).strip() + if len(row.replace(";","")) > 0 : + m.append(row) + return m + + +def read(args) : + """ + This function will perform the actual reads of process informations. + @return {user,pid,start,status, name, args, mem,cpu} + """ + cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'" + xchar = ";" + try: + handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) + stream = handler.communicate()[0] + if sys.version_info[0] > 2 : + rows = str(stream).split('\\n') + else: + rows = stream.split('\n') + + formatter = Util() + m = formatter.parse(rows) + + d = datetime.datetime.now().strftime('%m-%d-%Y') + t = datetime.datetime.now().strftime('%H:%M:%S') + m = [item for item in m if len(item) != len (m[0])] + m = "\n".join(m[1:]) + df = pd.read_csv(pd.compat.StringIO(m),sep=xchar) + df['date'] = np.repeat(d,df.shape[0]) + df['time'] = np.repeat(t,df.shape[0]) + df['node'] = np.repeat(os.uname()[1],df.shape[0]) + df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node'] + + + # + # We should filter the name of the apps we are interested in here (returning the full logs ) + # @TODO: Add filter here to handle filter on different columns + # + + if 'name' in args : + names = args['name'].split(',') + r = pd.DataFrame() + for name in names : + tmp = df[df.name == name.strip()] + if tmp.shape[0] : + r = r.append(tmp) + df = r + # + # For security reasons lets has the args columns with an MD5 or sha256 + # + + + df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()] + STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'} + df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN')) + if 'cols' in args : + _cols = list(set(df.columns.tolist()) & set(args['cols'])) + if _cols : + df = df[_cols] + # + # we return a list of objects (no data-frames) + if 'logger' in args and args['logger'] != None : + logger = args['logger'] + logger(data=df) + return df.to_dict(orient='records') + + except Exception as e: + print (e) + pass + +if __name__ == '__main__' : + # + # Being directly called (external use of the ) + print(read()) diff --git a/src/smart/top/__main__.py b/src/smart/top/__main__.py new file mode 100644 index 0000000..4871de6 --- /dev/null +++ b/src/smart/top/__main__.py @@ -0,0 +1,5 @@ +import smart.top +import pandas as pd +df = pd.DataFrame (smart.top.read(name='firefox,code')) + +print (df.groupby(['user'])['cpu','mem'].sum()) \ No newline at end of file diff --git a/src/smart/view.py b/src/smart/view.py new file mode 100644 index 0000000..7cca938 --- /dev/null +++ b/src/smart/view.py @@ -0,0 +1,45 @@ +""" +""" +import smart +import sys +import json +import pandas as pd +import numpy as np +from multiprocessing import Process + + +SYS_ARGS = {} +if len(sys.argv) > 1: + + N = len(sys.argv) + for i in range(1,N): + value = None + if sys.argv[i].startswith('--'): + key = sys.argv[i][2:] #.replace('-','') + SYS_ARGS[key] = 1 + if i + 1 < N: + value = sys.argv[i + 1] = sys.argv[i+1].strip() + if key and value: + SYS_ARGS[key] = value + + + i += 2 + +# +# Let's determine what data to return ... +# --folder and/or --app +def format(prefix,pointer,logger,container): + return [{'args':{prefix:term.strip(),'logger':logger},'pointer':pointer} for term in container] + +folders = format('path',smart.folder.read,smart.logger.log,SYS_ARGS['folders'].split(',')) if 'folders' in SYS_ARGS else [] +apps = format('name',smart.top.read,smart.logger.log,SYS_ARGS['apps'].split(',')) if 'apps' in SYS_ARGS else [] +if 'cols' in SYS_ARGS : + cols = [name.strip() for name in SYS_ARGS['cols'].split(',')] +else: + cols = [] +nodes = folders + apps +for node in nodes : + if cols : + node['args']['cols'] = cols + pthread = Process(target=node['pointer'],args=(node['args'],)) + pthread.start() \ No newline at end of file