bug fix & refactoring

2 years ago · 5834a10099
parent b063852f6d
commit 5834a10099
10 changed files with 225 additions and 38 deletions
--- a/setup.py
+++ b/setup.py
@ -3,16 +3,17 @@
 from setuptools import setup, find_packages
 import os
 import sys
 import smart.info
 def read(fname):
    return open(os.path.join(os.path.dirname(__file__), fname)).read() 
 args    = {
-    "name":"smart-top",
+    "name":smart.info.__app_name__,
-    "version":"1.0.6",
+    "version":smart.info.__version__,
-    "author":"The Phi Technology LLC","author_email":"info@the-phi.com",
+    "author":smart.info.__author__,"author_email":"info@the-phi.com",
-    "license":"MIT",
+    "license":smart.info.__license__,
    "packages":["smart","smart.top","smart.folder","smart.logger"]}
 args["keywords"]=['mongodb','couchdb','rabbitmq','file','read','write','s3','sqlite']
-args["install_requires"] = ['pandas','numpy','requests','data-transport@git+https://healthcareio.the-phi.com/git/code/transport.git']
+args["install_requires"] = ['pandas','numpy','requests','data-transport@git+https://github.com/lnyemba/data-transport.git']
 args["url"] =   "https://dev.the-phi.com/git/steve/smart-top.git"
 args['scripts'] = ['bin/smart-top']
 # args['entry_point'] = {'console-scripts':['smart-top=smart-top:main']}
--- a/smart/init.py
+++ b/smart/init.py
@ -7,9 +7,124 @@ usage:
 dependencies :
    data-transport  pip install git+https://dev.the-phi.com/git/steve/data-transport.git
 """
 import smart
 import smart.top
 import smart.folder
 import smart.top
 import smart.logger
 import smart.files
 import uuid
 import typer
 import smart.info
 import json
 import os
 import transport
 import shutil
 from datetime import datetime
 _cli = typer.Typer()
@_cli.command(name='top')
 def apply_apps (app:str=None,user:str=None):
    """
    This function looks at applications/commands running on the system
    """
    _df = smart.top.read()
    _id = 'apps' if not app else app
    if app  :
        _index = _df.name == app
        if _index.sum() :
            _df = _df[_index]
    post(_df,_id)
@_cli.command(name='archive')
 def _archive():
    """
    This function will archive the database, by renaming it into
    """
    _suffix = datetime.now()
    _suffix = "-".join([str(_value) for _value in [_suffix.year,_suffix.month,_suffix.day,_suffix.hour,_suffix.minute]])
    _path = os.sep.join([smart.info.__home__,smart.info.__database__]) 
    _src = _path + '.db3'
    if os.path.exists(_src):
        _target = _path +'-archived-on-'+ _suffix+'.db3'
        shutil.move(_src,_target)
        _msg = f"""Archive created successfully at:
        {_target}"""
    else:
        _msg = """
        Archive function is not available at this time, please try after logs have been stored
        """
    print(_msg)
@_cli.command(name='folder')
 def apply_folder(path:str):
    """
    This function will read the content of a folder and generate a 
    """
    _df = smart.folder.read(path=path)
    # print (_df)
    post(_df,'folders')
    pass
@_cli.command (name='files')
 def apply_files(folder:str) :
    _df = smart.files.read(folder)
    post(_df,'files')
@_cli.command(name='register')
 def apply_signup (email:str,key:str=None,provider:str='sqlite') :
    _config = {"system":{"email":email,"uid":str(uuid.uuid4()),"version":smart.info.__version__},"store":{"provider":provider,"context":"write"}}
    _db = smart.info.__database__
    if provider in ['sqlite','sqlite3'] :
        _db = os.sep.join([smart.info.__home__,_db+'.db3'])
        _config['store']['database'] = _db
    else:
        _config['store']['database'] = _db
    #
    # Let us store this in a folder
    _PATH = smart.info.__home__
    _verb = "written"
    if not os.path.exists(_PATH) :
        os.mkdir(_PATH)
    else:
        _verb = "updated"
    f = open(os.sep.join([_PATH,'config.json']),'w')
    f.write(json.dumps(_config))
    f.close()
    _msg = f"""
        The configuration file was {_verb} successfully at {smart.info.__home__}
        data store: 
            provider {provider}
            database {_db}
        If your database has security enabled, consider updating "{smart.info.__home__}{os.sep}config.json" For appropriate security
        Visit https://github.com/lnyemba/data-transport for more information
    """
    print ()
    print (_msg)
    pass
 def post(_df,_table):
    """
    Store data in a given location 
    """
    _path = os.sep.join([smart.info.__home__,'config.json'])
    f = open (_path)
    _config = json.loads(f.read())
    f.close()
    _store = _config['store']
    if _store['provider'] in ['mongodb','mongo','couch','couchdb'] :
        _store['collection'] = _table
    else:
        _store['table'] = _table
    writer = transport.factory.instance(**_store)
    writer.write(_df)
    if hasattr(writer,'close') :
        writer.close()
 if __name__ == '__main__' :
    _cli()
 # from transport import factory
 # class logger :
--- a/smart/main.py
+++ b/smart/main.py
@ -0,0 +1 @@
 __init__.py
--- a/smart/alert/init.py
+++ b/smart/alert/init.py
@ -1,15 +0,0 @@
 """
 This file will submit an alert to either a mailbox given a set of parameters, this will perform as following :
    - as-a-service
    - embedded
 """
 check = None
 def post(**args):
    """
    This function will submit a report to a given target provided some input 
    :key    will perform as-a-service
    :data   data that will be submitted to smtp/queue server
    :smtp   will send the file to a mailbox
    """
    pass
--- a/smart/files/init.py
+++ b/smart/files/init.py
@ -0,0 +1,65 @@
 """
 This file will submit an alert to either a mailbox given a set of parameters, this will perform as following :
    - as-a-service
    - embedded
 """
 import os
 import pandas as pd
 import subprocess
 import glob
 from datetime import datetime
 def post(**args):
    """
    This function will submit a report to a given target provided some input 
    :key    will perform as-a-service
    :data   data that will be submitted to smtp/queue server
    :smtp   will send the file to a mailbox
    """
    pass
 def parse(_stream):
    """
    :stream     single from the output command that has been executed
    """
    _blocks = _stream.replace('  ',' ').split(' ')
    if len(_blocks) > 6 :
        _user = _blocks[1]
        _group= _blocks[2]
        _size = _blocks[3]  # if units are not specified please interpet this as bytes
        _date = "-".join(_blocks[4:6])
        _time = _blocks[6]
        _name = _blocks[-1]
        if ':' not in _time :
            _date = _date+' '+_time
            _time = '00:00'
        else:
            _date = _date+'-'+str(datetime.now().year)
        _name = _blocks[-1]
        return {'user':_user,'date':_date,'time':_time,'size':_size,'content':None,'name':_name}
 def apply(_cmd, parser=None):
    handler = subprocess.Popen(_cmd,shell=True,stdout=subprocess.PIPE,encoding='utf-8')	    		
    stream = handler.communicate()[0].split('\n')
    stream =  [line.strip() for line in stream]
    if not parser :
        # print (dict(zip(['hash','names'],stream[0].split())))
        stream = [ line.strip().replace('  ',' ').split(' ') for line in stream if len(line.strip().split()) == 2]
        return pd.DataFrame([dict(zip(['content','name'],line)) for line in stream])
        # return pd.DataFrame([ line.split() for line in stream ])
        # return pd.DataFrame( dict(zip(['checksum','name'],[line.strip().split(' '))) for line in stream if line.strip() != '']) )
    else:
        return pd.DataFrame([ parser(line.strip()) for line in stream if line.strip() != ''])
 def read (path):
    """
    This function will read files in a folder and provide has expressions of the files
    """
    _cmd = ["""find :path -type f -exec md5sum "{}" + """ , """find :path -type f -exec ls -lh "{}" + |grep -E " .*$" -o """]
    _df = apply(_cmd[0].replace(":path",path))
    _data= apply(_cmd[1].replace(":path",path),parse)
    if _data.shape[0] == _df.shape[0] :
        _data['content'] = _df.content
    return _data
 read('/home/steve/tmp/logs')
--- a/smart/folder/init.py
+++ b/smart/folder/init.py
@ -9,6 +9,8 @@ import os
 import pandas as pd
 import io
 import datetime
 import glob
 class Util :
    def size(self,stream):
@ -58,13 +60,14 @@ def read(**args):
        r = dict(r, **rows)
    N = 0 if not os.path.exists(args['path']) else len( os.listdir(args['path']))
    path = args['path'] if args['path'].endswith('/')else args['path']+os.sep
    r['path'] = args['path']
-    r['files']= N
+    r['files']= len([filename for filename in glob.iglob(path+'**/**', recursive=True)])
    r['name'] = args['path'].split(os.sep)[-1:][0]
    r['node'] = os.uname()[1]
    r['date'] = datetime.datetime.now().strftime('%m-%d-%Y')
    r['time'] = datetime.datetime.now().strftime('%H:%M:%S')
-
+    
    return pd.DataFrame([r])
    pass
--- a/smart/folder/main.py
+++ b/smart/folder/main.py
@ -1,2 +1,2 @@
 import smart.folder
-print (smart.folder.read(path='/home/steve/dev/data/vumc/aou'))
+print (smart.folder.read(path='/home/steve/tmp/logs'))
--- a/smart/info.py
+++ b/smart/info.py
@ -0,0 +1,15 @@
 import os
 __app_name__= "smart-logger"
 __version__ = "1.0-RC"
 __author__ = "Steve L. Nyemba, info@the-phi.com"
 __home__ = os.sep.join([os.environ['HOME'],'.smart-logger'])
 __database__='smart_logs'
 __license__= """
 Copyright 2017 - 2023, The Phi Technology
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 """
--- a/smart/top/init.py
+++ b/smart/top/init.py
@ -29,10 +29,10 @@ class Util:
            name = p[0]
            args = " ".join(p[1:])
        else:
-            name = cmd.split('/')[len(cmd.split(os.sep))-1]
+            name = cmd.split(os.sep)[len(cmd.split(os.sep))-1]
            args = " ".join(stream[index:]) if index > 0 else ""
-        return [name,cmd,args]        
+        return [name,cmd.replace('"',"\\'"),args.replace('"',"\\'")]        
    def parse(self,rows,xchar=';'):
        """
        This function parses the document returned by the execution of the command returns a document that will have to be parsed and formatted
@ -42,7 +42,6 @@ class Util:
        ARGS_INDEX = 6
        for item in rows :
            if rows.index(item) != 0 :
                parts = item.split(xchar)
                row = parts[:TIME_INDEX]
@ -64,14 +63,13 @@ def read(**args) :
    cmd     = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'"
    xchar   = ";"
    try:
        handler = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE)			
        stream = handler.communicate()[0]        
        if sys.version_info[0] > 2 :
            rows = str(stream).split('\\n')
        else:
            rows = stream.split('\n')
-        
+
        formatter = Util()
        m = formatter.parse(rows)
@ -86,22 +84,22 @@ def read(**args) :
        df['node'] = np.repeat(os.uname()[1],df.shape[0])
        df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node']
        #
        # We should filter the name of the apps we are interested in here (returning the full logs )
        # @TODO: Add filter here to handle filter on different columns
        #
-        if 'name' in args :
+        if 'name' in args and args['name']:
            names = args['name'].split(',')
            r = pd.DataFrame()
            for name in names :
                # tmp = df[df.name == name.strip() ]
-                ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist()
+                # ii = df.apply(lambda row: row['name'] == name.strip() or (name.strip() in str(row['name'])),axis=1).tolist()
                ii = df.apply(lambda row: type(row['cmd']) ==str and name.strip() in row['cmd'],axis=1).tolist()
                tmp= df[ii]
                # tmp.index = np.arange(tmp.shape[0])
                if tmp.empty:
-                    tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":_name,"cmd":None,"args":None,"date":d,"time":t,"node":n}
+                    tmp = {"pid":None,"user":None,"mem":0,"cpu":0,"status":"-100","started":None,"name":name,"cmd":None,"args":None,"date":d,"time":t,"node":n}
                else:
                    r = r.append(tmp,ignore_index=False)
@ -112,8 +110,8 @@ def read(**args) :
        # For security reasons lets has the args columns with an MD5 or sha256
        #
-        if not df.empty and 'args' in df : 
+        # if not df.empty and 'args' in df : 
-            df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
+        #     df.args = [hashlib.md5(str(value).encode('utf-8')).hexdigest() for value in df.args.tolist()]
        STATUS = {'R':'RUNNING','Z':'DEAD','D':'STASIS','S':'SLEEP','Sl':'SLEEP','Ss':'SLEEP','W':'PAGING','T':'DEAD'}
        df.status = df.status.apply(lambda value: STATUS.get(value,'UNKNOWN'))
        if 'cols' in args :
@ -126,7 +124,7 @@ def read(**args) :
            logger = args['logger']
            logger(data=df)
        df.index = np.arange(df.shape[0])
-       
+        
        return df #.to_dict(orient='records')
    except Exception as e:
--- a/smart/top/main.py
+++ b/smart/top/main.py
@ -32,6 +32,7 @@ if len(sys.argv) > 1:
        i += 2
 if __name__ == '__main__' :
    try:
        if 'help' in SYS_ARGS :
@ -50,7 +51,9 @@ if __name__ == '__main__' :
            df = pd.DataFrame()
            if 'folder' in SYS_ARGS :
                for path in SYS_ARGS['folder'].split(',') :
-                    df = df.append(smart.folder.read(path=path))
+                    #df = df.concat(smart.folder.read(path=path))
                    _df = smart.folder.read(path=path)
                    df = _df if df.shape[0] == 0 else pd.concat(df,_df)
                cols = df.columns.tolist()
            else:
                df =  smart.top.read()
@ -73,7 +76,8 @@ if __name__ == '__main__' :
            # df = pd.DataFrame(smart.top.read(name='fire'))    
-            log = log.append(df)
+            #log = log.append(df)
            log = df if log.shape[0] ==0 else pd.concat(log,df)
            if not df.empty :
                print (df[cols])
            if 'watch' in SYS_ARGS :                
@ -89,4 +93,4 @@ if __name__ == '__main__' :
        print ("... Exiting, Thanks for using smart-top")
    # pass
-# print (df.groupby(['user'])['cpu','mem'].sum())
+# print (df.groupby(['user'])['cpu','mem'].sum())
`@ -1,2 +1,2 @@`
	`import smart.folder`	`import smart.folder`
	`print (smart.folder.read(path='/home/steve/dev/data/vumc/aou'))`	`print (smart.folder.read(path='/home/steve/tmp/logs'))`