diff --git a/src/data-collector.py b/src/data-collector.py index e126c11..1df9474 100755 --- a/src/data-collector.py +++ b/src/data-collector.py @@ -41,8 +41,8 @@ class Collector : # -- let's get the list of features we are interested . url = SYS_ARGS['api']+'/1/client/login' key = SYS_ARGS['key'] - id = SYS_ARGS['id'] if 'id' in SYS_ARGS else os.environ['HOSTNAME'] - headers = {"key":key,"id":id} + self.id = SYS_ARGS['id'] if 'id' in SYS_ARGS else os.environ['HOSTNAME'] + headers = {"key":key,"id":self.id} # #-- what features are allowed @@ -75,7 +75,9 @@ class Collector : key = SYS_ARGS['key'] id = SYS_ARGS['id'] if 'id' in SYS_ARGS else os.environ['HOSTNAME'] headers = {"key":key,"id":id,"context":args['context'],"content-type":"application/json"} - body = args['data'].to_json(orient='records') + + body = args['data'].fillna('').to_json(orient='records') + if args['data'].shape[0] > 0 : r = self.httpclient.post(url,headers=headers,data=body) Logger.log(action="post."+args['context'],value=r.status_code) @@ -93,9 +95,9 @@ class Collector : if self.config and self.features : ELAPSED_TIME = 60* int(self.features['schedule'].replace("min","").strip()) if 'apps' in self.config : - self.post( data=(Apps()).get(filter=self.config['apps']),context="apps") + self.post( data=(Apps(node=self.id)).get(filter=self.config['apps']),context="apps") if 'folders' in self.config and self.config['folders'] : - folder = Folders() + folder = Folders(node=self.id) f = folder.get(path=self.config['folders']) self.post(data = f ,context="folders") @@ -105,6 +107,7 @@ class Collector : # @TODO: Evaluate whether to wake up the system or not (security concerns)! # time.sleep(ELAPSED_TIME) + except Exception,e: Logger.log(action='error',value=e.message) diff --git a/src/monitor.py b/src/monitor.py index b6b6287..f81ee96 100755 --- a/src/monitor.py +++ b/src/monitor.py @@ -16,17 +16,21 @@ import sys import pandas as pd import datetime class SmartTop: + def __init__(self,**args): + self.node = args['node'] def get(self,**args): return None class Apps(SmartTop) : - def __init__(self): + def __init__(self,**args): """ This class will process a system command and parse the outpout accordingly given a parser @param parse is a parser pointer """ + SmartTop.__init__(self,**args) self.cmd = "ps -eo pid,user,pmem,pcpu,stat,etime,args|awk 'OFS=\";\" {$1=$1; if($5 > 9) print }'" self.xchar = ';' + def get_app(self,stream): index = 1 if os.path.exists(" ".join(stream[:1])) else len(stream)-1 @@ -46,13 +50,18 @@ class Apps(SmartTop) : @param m raw matrix i.e list of values like a csv """ + d = datetime.datetime.now().strftime('%m-%d-%Y') + t = datetime.datetime.now().strftime('%H:%M:%S') m = [item for item in m if len(item) != len (m[0])] m = "\n".join(m[1:]) df = pd.read_csv(pd.compat.StringIO(m),sep=self.xchar) - df.columns =['pid','user','mem','cpu','status','started','name','cmd','args'] + df['date'] = np.repeat(d,df.shape[0]) + df['time'] = np.repeat(t,df.shape[0]) + df['node'] = np.repeat(self.node,df.shape[0]) + df.columns =['pid','user','mem','cpu','status','started','name','cmd','args','date','time','node'] return df def empty(self,name): - return pd.DataFrame([{"pid":None,"user":None,"mem":0,"cpu":0,"status":"X","started":None,"name":name,"cmd":None,"args":None}]) + return pd.DataFrame([{"pid":None,"user":None,"mem":0,"cpu":0,"status":"X","started":None,"name":name,"cmd":None,"args":None,"date":None,"time":None,"node":self.node}]) def parse(self,rows): m = [] TIME_INDEX = 5 @@ -79,7 +88,7 @@ class Apps(SmartTop) : """ try: - handler = subprocess.Popen(self.cmd,shell=True,stdout=subprocess.PIPE) + handler = subprocess.Popen(self.cmd,shell=True,stdout=subprocess.PIPE) stream = handler.communicate()[0] rows = stream.split('\n') df = self.to_pandas(self.parse(rows)) @@ -87,20 +96,28 @@ class Apps(SmartTop) : if 'filter' in args : pattern = "|".join(args['filter']) i = df.cmd.str.contains(pattern) - r = df[i] + r = df[i].copy() + r.index = np.arange(0,r.shape[0]) ii= (1 + np.array(i)*-1) == 1 - other = pd.DataFrame(df[ii].sum()).T + other = pd.DataFrame(df[ii].sum()).T.copy() + other.index = np.arange(0,other.shape[0]) other.user = other.name = other.status = other.cmd = other.args = 'other' other.started = other.pid = -1 other = other[other.columns[1:]] for name in args['filter'] : - i = r.cmd.str.contains(str(name),case=False,na=False) + i = r.cmd.str.contains(str(name.strip()),case=False,na=False) if i.sum() == 0: r = r.append(self.empty(name),sort=False) else : - r.loc[i,'name'] = name - r = r.append(other,sort=False) + pass + # r[i].update (pd.DataFrame({"name":np.repeat(name,r.shape[0])})) + r.loc[i, 'name'] = np.repeat(name,i.sum()) + # r.loc[i].name = name + + + r = r.append(other,sort=False) r.index = np.arange(r.shape[0]) + return r except Exception,e: print (e) @@ -111,8 +128,9 @@ class Folders(SmartTop): """ This class will assess a folder and produce a report in a data-frame that can be later on used for summary statistics """ - def __init__(self): - pass + def __init__(self,**args): + SmartTop.__init__(self,**args) + def _get(self,dir_path,r=[]): for child in os.listdir(dir_path): path = os.path.join(dir_path, child) @@ -125,9 +143,8 @@ class Folders(SmartTop): file_date = datetime.datetime.fromtimestamp(file_date) now = datetime.datetime.now() age = (now - file_date ).days - name = os.path.basename(path) - r.append({"name":name,"path":path,"size":size,"age":age}) + r.append({"name":name,"path":path,"size":size,"age":age,"date":now.strftime('%m-%d-%Y'),"time":now.strftime('%H:%M:%S'),"node":self.node }) return r def get(self,**args): @@ -140,6 +157,7 @@ class Folders(SmartTop): paths = paths _out = pd.DataFrame() for path in paths : + name = os.path.basename(path) if os.path.exists(path) : # # If the folder does NOT exists it should not be treated. @@ -147,9 +165,10 @@ class Folders(SmartTop): rows = self._get(path) if len(rows) > 0 : r = pd.DataFrame(rows) - r = pd.DataFrame([{"name":path,"files":r.shape[0],"age_in_days":r.age.mean(),"size_in_kb":r['size'].sum()}]) + r = pd.DataFrame([{"name":name,"path":path,"files":r.shape[0],"age_in_days":r.age.mean(),"size_in_kb":r['size'].sum(),"date":r.date.max(),"time":r.time.max(),"node":r.node.max()}]) _out = _out.append(r,sort=False) # # @TODO: The state of the hard-drive would be a good plus - + # os.system('df -h /') + _out.index = np.arange(0,_out.shape[0]) return _out