You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
74 lines
2.1 KiB
Python
74 lines
2.1 KiB
Python
"""
|
|
This file is designed to retrieve information on a folder
|
|
{files,size,hash}
|
|
"""
|
|
import subprocess
|
|
import sys
|
|
import re
|
|
import os
|
|
import pandas as pd
|
|
import io
|
|
import datetime
|
|
import glob
|
|
|
|
class Util :
|
|
def size(self,stream):
|
|
|
|
|
|
PATTERN = '(^.+)([A-Z]+$)'
|
|
value,units = re.match('^(.+)([A-Z]+$)',stream).groups()
|
|
value = float(value)
|
|
if 'G' == units :
|
|
units = 'GB'
|
|
# value *= 1000
|
|
|
|
elif 'K' == units:
|
|
units = 'KB'
|
|
# value /= 1000
|
|
else :
|
|
units = 'MB'
|
|
# units = 'MB'
|
|
return {"size":value,"units":units}
|
|
def content(self,stream):
|
|
return {"content":stream.split(' ')[0].strip()}
|
|
|
|
|
|
def read(**args):
|
|
"""
|
|
The path can also take in regular expressions
|
|
"""
|
|
cmd = {"size":"du -sh :path","content":"find :path -type f -exec md5sum {} + | sort -z|md5sum"}
|
|
r = {}
|
|
util = Util()
|
|
for key in cmd :
|
|
_cmd = cmd[key]
|
|
handler = subprocess.Popen(_cmd.replace(':path',args['path']),shell=True,stdout=subprocess.PIPE,encoding='utf-8')
|
|
stream = handler.communicate()[0]
|
|
|
|
if sys.version_info[0] > 2 :
|
|
rows = str(stream).split('\n')
|
|
else:
|
|
rows = stream.split('\n')
|
|
if key == 'size' :
|
|
rows = rows[0]
|
|
rows = util.size(rows.split('\t')[0])
|
|
elif key == 'content' :
|
|
#
|
|
# There is a hash key that is generated and should be extracted
|
|
rows = rows[0]
|
|
rows = util.content(rows)
|
|
|
|
r = dict(r, **rows)
|
|
N = 0 if not os.path.exists(args['path']) else len( os.listdir(args['path']))
|
|
path = args['path'] if args['path'].endswith('/')else args['path']+os.sep
|
|
r['path'] = args['path']
|
|
r['files']= len([filename for filename in glob.iglob(path+'**/**', recursive=True)])
|
|
r['name'] = args['path'].split(os.sep)[-1:][0]
|
|
r['node'] = os.uname()[1]
|
|
r['date'] = datetime.datetime.now().strftime('%m-%d-%Y')
|
|
r['time'] = datetime.datetime.now().strftime('%H:%M:%S')
|
|
|
|
return pd.DataFrame([r])
|
|
|
|
pass
|