""" This file is designed to retrieve information on a folder {files,size,hash} """ import subprocess import sys import re import os import pandas as pd import io import datetime class Util : def size(self,stream): PATTERN = '(^.+)([A-Z]+$)' value,units = re.match('^(.+)([A-Z]+$)',stream).groups() value = float(value) if 'G' == units : value *= 1000 elif 'K' == units: value /= 1000 units = 'MB' return {"size":value,"units":units} def content(self,stream): return {"content":stream.split(' ')[0].strip()} def read(**args): """ The path can also take in regular expressions """ cmd = {"size":"du -sh :path","content":"find :path -type f -exec md5sum {} + | sort -z|md5sum"} r = {} util = Util() for key in cmd : _cmd = cmd[key] handler = subprocess.Popen(_cmd.replace(':path',args['path']),shell=True,stdout=subprocess.PIPE,encoding='utf-8') stream = handler.communicate()[0] if sys.version_info[0] > 2 : rows = str(stream).split('\n') else: rows = stream.split('\n') if key == 'size' : rows = rows[0] rows = util.size(rows.split('\t')[0]) elif key == 'content' : # # There is a hash key that is generated and should be extracted rows = rows[0] rows = util.content(rows) r = dict(r, **rows) r['path'] = args['path'] r['name'] = args['path'].split(os.sep)[-1:][0] r['node'] = os.uname()[1] r['date'] = datetime.datetime.now().strftime('%m-%d-%Y') r['time'] = datetime.datetime.now().strftime('%H:%M:%S') return pd.DataFrame([r]) pass