You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
smart-top/smart/folder/__init__.py

74 lines
2.1 KiB
Python

"""
This file is designed to retrieve information on a folder
{files,size,hash}
"""
import subprocess
import sys
import re
import os
import pandas as pd
import io
import datetime
import glob
class Util :
def size(self,stream):
PATTERN = '(^.+)([A-Z]+$)'
value,units = re.match('^(.+)([A-Z]+$)',stream).groups()
value = float(value)
if 'G' == units :
units = 'GB'
# value *= 1000
elif 'K' == units:
units = 'KB'
# value /= 1000
else :
units = 'MB'
# units = 'MB'
return {"size":value,"units":units}
def content(self,stream):
return {"content":stream.split(' ')[0].strip()}
def read(**args):
"""
The path can also take in regular expressions
"""
cmd = {"size":"du -sh :path","content":"find :path -type f -exec md5sum {} + | sort -z|md5sum"}
r = {}
util = Util()
for key in cmd :
_cmd = cmd[key]
handler = subprocess.Popen(_cmd.replace(':path',args['path']),shell=True,stdout=subprocess.PIPE,encoding='utf-8')
stream = handler.communicate()[0]
if sys.version_info[0] > 2 :
rows = str(stream).split('\n')
else:
rows = stream.split('\n')
if key == 'size' :
rows = rows[0]
rows = util.size(rows.split('\t')[0])
elif key == 'content' :
#
# There is a hash key that is generated and should be extracted
rows = rows[0]
rows = util.content(rows)
r = dict(r, **rows)
N = 0 if not os.path.exists(args['path']) else len( os.listdir(args['path']))
path = args['path'] if args['path'].endswith('/')else args['path']+os.sep
r['path'] = args['path']
r['files']= len([filename for filename in glob.iglob(path+'**/**', recursive=True)])
r['name'] = args['path'].split(os.sep)[-1:][0]
r['node'] = os.uname()[1]
r['date'] = datetime.datetime.now().strftime('%m-%d-%Y')
r['time'] = datetime.datetime.now().strftime('%H:%M:%S')
return pd.DataFrame([r])
pass