From 5834a10099714aa123dadf6bd50c5088ca145cf7 Mon Sep 17 00:00:00 2001 From: Steve Nyemba Date: Mon, 11 Dec 2023 13:09:02 -0600 Subject: [PATCH] bug fix & refactoring --- setup.py | 11 ++-- smart/__init__.py | 115 +++++++++++++++++++++++++++++++++++++++ smart/__main__.py | 1 + smart/alert/__init__.py | 15 ----- smart/files/__init__.py | 65 ++++++++++++++++++++++ smart/folder/__init__.py | 7 ++- smart/folder/__main__.py | 2 +- smart/info.py | 15 +++++ smart/top/__init__.py | 22 ++++---- smart/top/__main__.py | 10 +++- 10 files changed, 225 insertions(+), 38 deletions(-) create mode 120000 smart/__main__.py delete mode 100644 smart/alert/__init__.py create mode 100644 smart/files/__init__.py create mode 100644 smart/info.py diff --git a/setup.py b/setup.py index 1058f67..2553c5b 100644 --- a/setup.py +++ b/setup.py @@ -3,16 +3,17 @@ from setuptools import setup, find_packages import os import sys +import smart.info def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() args = { - "name":"smart-top", - "version":"1.0.6", - "author":"The Phi Technology LLC","author_email":"info@the-phi.com", - "license":"MIT", + "name":smart.info.__app_name__, + "version":smart.info.__version__, + "author":smart.info.__author__,"author_email":"info@the-phi.com", + "license":smart.info.__license__, "packages":["smart","smart.top","smart.folder","smart.logger"]} args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite'] -args["install_requires"] = ['pandas','numpy','requests','data-transport@git+https://healthcareio.the-phi.com/git/code/transport.git'] +args["install_requires"] = ['pandas','numpy','requests','data-transport@git+https://github.com/lnyemba/data-transport.git'] args["url"] = "https://dev.the-phi.com/git/steve/smart-top.git" args['scripts'] = ['bin/smart-top'] # args['entry_point'] = {'console-scripts':['smart-top=smart-top:main']} diff --git a/smart/__init__.py b/smart/__init__.py index bb40640..2ad1b91 100644 --- a/smart/__init__.py +++ b/smart/__init__.py @@ -7,9 +7,124 @@ usage: dependencies : data-transport pip install git+https://dev.the-phi.com/git/steve/data-transport.git """ +import smart import smart.top import smart.folder +import smart.top import smart.logger +import smart.files +import uuid +import typer +import smart.info +import json +import os +import transport +import shutil +from datetime import datetime +_cli = typer.Typer() + +@_cli.command(name='top') +def apply_apps (app:str=None,user:str=None): + """ + This function looks at applications/commands running on the system + """ + + _df = smart.top.read() + _id = 'apps' if not app else app + if app : + _index = _df.name == app + if _index.sum() : + _df = _df[_index] + post(_df,_id) + +@_cli.command(name='archive') +def _archive(): + """ + This function will archive the database, by renaming it into + """ + + _suffix = datetime.now() + _suffix = "-".join([str(_value) for _value in [_suffix.year,_suffix.month,_suffix.day,_suffix.hour,_suffix.minute]]) + _path = os.sep.join([smart.info.__home__,smart.info.__database__]) + _src = _path + '.db3' + if os.path.exists(_src): + _target = _path +'-archived-on-'+ _suffix+'.db3' + shutil.move(_src,_target) + _msg = f"""Archive created successfully at: + {_target}""" + else: + _msg = """ + Archive function is not available at this time, please try after logs have been stored + """ + print(_msg) +@_cli.command(name='folder') +def apply_folder(path:str): + """ + This function will read the content of a folder and generate a + """ + _df = smart.folder.read(path=path) + # print (_df) + post(_df,'folders') + pass +@_cli.command (name='files') +def apply_files(folder:str) : + _df = smart.files.read(folder) + post(_df,'files') +@_cli.command(name='register') +def apply_signup (email:str,key:str=None,provider:str='sqlite') : + _config = {"system":{"email":email,"uid":str(uuid.uuid4()),"version":smart.info.__version__},"store":{"provider":provider,"context":"write"}} + _db = smart.info.__database__ + if provider in ['sqlite','sqlite3'] : + _db = os.sep.join([smart.info.__home__,_db+'.db3']) + _config['store']['database'] = _db + else: + _config['store']['database'] = _db + # + # Let us store this in a folder + _PATH = smart.info.__home__ + _verb = "written" + if not os.path.exists(_PATH) : + os.mkdir(_PATH) + else: + _verb = "updated" + f = open(os.sep.join([_PATH,'config.json']),'w') + f.write(json.dumps(_config)) + f.close() + _msg = f""" + The configuration file was {_verb} successfully at {smart.info.__home__} + data store: + provider {provider} + database {_db} + + If your database has security enabled, consider updating "{smart.info.__home__}{os.sep}config.json" For appropriate security + Visit https://github.com/lnyemba/data-transport for more information + """ + print () + print (_msg) + pass +def post(_df,_table): + """ + Store data in a given location + """ + _path = os.sep.join([smart.info.__home__,'config.json']) + f = open (_path) + _config = json.loads(f.read()) + f.close() + _store = _config['store'] + if _store['provider'] in ['mongodb','mongo','couch','couchdb'] : + _store['collection'] = _table + else: + _store['table'] = _table + + + writer = transport.factory.instance(**_store) + writer.write(_df) + if hasattr(writer,'close') : + writer.close() + +if __name__ == '__main__' : + _cli() + # from transport import factory # class logger : diff --git a/smart/__main__.py b/smart/__main__.py new file mode 120000 index 0000000..93f5256 --- /dev/null +++ b/smart/__main__.py @@ -0,0 +1 @@ +__init__.py \ No newline at end of file diff --git a/smart/alert/__init__.py b/smart/alert/__init__.py deleted file mode 100644 index b513d3c..0000000 --- a/smart/alert/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -This file will submit an alert to either a mailbox given a set of parameters, this will perform as following : - - as-a-service - - embedded -""" - -check = None -def post(**args): - """ - This function will submit a report to a given target provided some input - :key will perform as-a-service - :data data that will be submitted to smtp/queue server - :smtp will send the file to a mailbox - """ - pass \ No newline at end of file diff --git a/smart/files/__init__.py b/smart/files/__init__.py new file mode 100644 index 0000000..c82bba0 --- /dev/null +++ b/smart/files/__init__.py @@ -0,0 +1,65 @@ +""" +This file will submit an alert to either a mailbox given a set of parameters, this will perform as following : + - as-a-service + - embedded +""" +import os +import pandas as pd +import subprocess +import glob +from datetime import datetime + +def post(**args): + """ + This function will submit a report to a given target provided some input + :key will perform as-a-service + :data data that will be submitted to smtp/queue server + :smtp will send the file to a mailbox + """ + pass +def parse(_stream): + """ + :stream single from the output command that has been executed + """ + _blocks = _stream.replace(' ',' ').split(' ') + if len(_blocks) > 6 : + _user = _blocks[1] + _group= _blocks[2] + _size = _blocks[3] # if units are not specified please interpet this as bytes + _date = "-".join(_blocks[4:6]) + _time = _blocks[6] + _name = _blocks[-1] + if ':' not in _time : + _date = _date+' '+_time + _time = '00:00' + else: + _date = _date+'-'+str(datetime.now().year) + _name = _blocks[-1] + return {'user':_user,'date':_date,'time':_time,'size':_size,'content':None,'name':_name} +def apply(_cmd, parser=None): + handler = subprocess.Popen(_cmd,shell=True,stdout=subprocess.PIPE,encoding='utf-8') + stream = handler.communicate()[0].split('\n') + stream = [line.strip() for line in stream] + if not parser : + # print (dict(zip(['hash','names'],stream[0].split()))) + + stream = [ line.strip().replace(' ',' ').split(' ') for line in stream if len(line.strip().split()) == 2] + return pd.DataFrame([dict(zip(['content','name'],line)) for line in stream]) + # return pd.DataFrame([ line.split() for line in stream ]) + + # return pd.DataFrame( dict(zip(['checksum','name'],[line.strip().split(' '))) for line in stream if line.strip() != '']) ) + else: + return pd.DataFrame([ parser(line.strip()) for line in stream if line.strip() != '']) +def read (path): + """ + This function will read files in a folder and provide has expressions of the files + """ + _cmd = ["""find :path -type f -exec md5sum "{}" + """ , """find :path -type f -exec ls -lh "{}" + |grep -E " .*$" -o """] + _df = apply(_cmd[0].replace(":path",path)) + _data= apply(_cmd[1].replace(":path",path),parse) + if _data.shape[0] == _df.shape[0] : + _data['content'] = _df.content + return _data + + +read('/home/steve/tmp/logs') \ No newline at end of file diff --git a/smart/folder/__init__.py b/smart/folder/__init__.py index caa5903..a50df84 100644 --- a/smart/folder/__init__.py +++ b/smart/folder/__init__.py @@ -9,6 +9,8 @@ import os import pandas as pd import io import datetime +import glob + class Util : def size(self,stream): @@ -58,13 +60,14 @@ def read(**args): r = dict(r, **rows) N = 0 if not os.path.exists(args['path']) else len( os.listdir(args['path'])) + path = args['path'] if args['path'].endswith('/')else args['path']+os.sep r['path'] = args['path'] - r['files']= N + r['files']= len([filename for filename in glob.iglob(path+'**/**', recursive=True)]) r['name'] = args['path'].split(os.sep)[-1:][0] r['node'] = os.uname()[1] r['date'] = datetime.datetime.now().strftime('%m-%d-%Y') r['time'] = datetime.datetime.now().strftime('%H:%M:%S') - + return pd.DataFrame([r]) pass diff --git a/smart/folder/__main__.py b/smart/folder/__main__.py index 1166748..4e64e08 100644 --- a/smart/folder/__main__.py +++ b/smart/folder/__main__.py @@ -1,2 +1,2 @@ import smart.folder -print (smart.folder.read(path='/home/steve/dev/data/vumc/aou')) \ No newline at end of file +print (smart.folder.read(path='/home/steve/tmp/logs')) \ No newline at end of file diff --git a/smart/info.py b/smart/info.py new file mode 100644 index 0000000..f27d3ec --- /dev/null +++ b/smart/info.py @@ -0,0 +1,15 @@ +import os +__app_name__= "smart-logger" +__version__ = "1.0-RC" +__author__ = "Steve L. Nyemba, info@the-phi.com" +__home__ = os.sep.join([os.environ['HOME'],'.smart-logger']) +__database__='smart_logs' +__license__= """ +Copyright 2017 - 2023, The Phi Technology + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +""" \ No newline at end of file diff --git a/smart/top/__init__.py b/smart/top/__init__.py index 2fe4a1b..e404a1f 100644 --- a/smart/top/__init__.py +++ b/smart/top/__init__.py @@ -29,10 +29,10 @@ class Util: name = p[0] args = " ".join(p[1:]) else: - name = cmd.split('/')[len(cmd.split(os.sep))-1] + name = cmd.split(os.sep)[len(cmd.split(os.sep))-1] args = " ".join(stream[index:]) if index > 0 else "" - return [name,cmd,args] + return [name,cmd.replace('"',"\\'"),args.replace('"',"\\'")] def parse(self,rows,xchar=';'): """ This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted @@ -42,7 +42,6 @@ class Util: ARGS_INDEX = 6 for item in rows : - if rows.index(item) != 0 : parts = item.split(xchar) row = parts[:TIME_INDEX] @@ -64,14 +63,13 @@ def read(**args) : cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'" xchar = ";" try: - handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE) stream = handler.communicate()[0] if sys.version_info[0] > 2 : rows = str(stream).split('\\n') else: rows = stream.split('\n') - + formatter = Util() m = formatter.parse(rows) @@ -86,22 +84,22 @@ def read(**args) : df['node'] = np.repeat(os.uname()[1],df.shape[0]) df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node'] - # # We should filter the name of the apps we are interested in here (returning the full logs ) # @TODO: Add filter here to handle filter on different columns # - if 'name' in args : + if 'name' in args and args['name']: names = args['name'].split(',') r = pd.DataFrame() for name in names : # tmp = df[df.name == name.strip() ] - ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist() + # ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist() + ii = df.apply(lambda row: type(row['cmd']) ==str and name.strip() in row['cmd'],axis=1).tolist() tmp= df[ii] # tmp.index = np.arange(tmp.shape[0]) if tmp.empty: - tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":_name,"cmd":None,"args":None,"date":d,"time":t,"node":n} + tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":name,"cmd":None,"args":None,"date":d,"time":t,"node":n} else: r = r.append(tmp,ignore_index=False) @@ -112,8 +110,8 @@ def read(**args) : # For security reasons lets has the args columns with an MD5 or sha256 # - if not df.empty and 'args' in df : - df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()] + # if not df.empty and 'args' in df : + # df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()] STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'} df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN')) if 'cols' in args : @@ -126,7 +124,7 @@ def read(**args) : logger = args['logger'] logger(data=df) df.index = np.arange(df.shape[0]) - + return df #.to_dict(orient='records') except Exception as e: diff --git a/smart/top/__main__.py b/smart/top/__main__.py index 403ddc1..fd8bdd2 100644 --- a/smart/top/__main__.py +++ b/smart/top/__main__.py @@ -32,6 +32,7 @@ if len(sys.argv) > 1: i += 2 + if __name__ == '__main__' : try: if 'help' in SYS_ARGS : @@ -50,7 +51,9 @@ if __name__ == '__main__' : df = pd.DataFrame() if 'folder' in SYS_ARGS : for path in SYS_ARGS['folder'].split(',') : - df = df.append(smart.folder.read(path=path)) + #df = df.concat(smart.folder.read(path=path)) + _df = smart.folder.read(path=path) + df = _df if df.shape[0] == 0 else pd.concat(df,_df) cols = df.columns.tolist() else: df = smart.top.read() @@ -73,7 +76,8 @@ if __name__ == '__main__' : # df = pd.DataFrame(smart.top.read(name='fire')) - log = log.append(df) + #log = log.append(df) + log = df if log.shape[0] ==0 else pd.concat(log,df) if not df.empty : print (df[cols]) if 'watch' in SYS_ARGS : @@ -89,4 +93,4 @@ if __name__ == '__main__' : print ("... Exiting, Thanks for using smart-top") # pass -# print (df.groupby(['user'])['cpu','mem'].sum()) \ No newline at end of file +# print (df.groupby(['user'])['cpu','mem'].sum())