You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
128 lines
3.4 KiB
Python
128 lines
3.4 KiB
Python
8 months ago
|
"""
|
||
|
Data Transport - 1.0
|
||
|
Steve L. Nyemba, The Phi Technology LLC
|
||
|
|
||
|
This file is a wrapper around s3 bucket provided by AWS for reading and writing content
|
||
|
"""
|
||
|
from datetime import datetime
|
||
|
import boto
|
||
|
from boto.s3.connection import S3Connection, OrdinaryCallingFormat
|
||
|
import numpy as np
|
||
|
import botocore
|
||
|
from smart_open import smart_open
|
||
|
import sys
|
||
|
|
||
|
import json
|
||
|
from io import StringIO
|
||
|
import json
|
||
|
|
||
|
class s3 :
|
||
|
"""
|
||
|
@TODO: Implement a search function for a file given a bucket??
|
||
|
"""
|
||
|
def __init__(self,**args) :
|
||
|
"""
|
||
|
This function will extract a file or set of files from s3 bucket provided
|
||
|
@param access_key
|
||
|
@param secret_key
|
||
|
@param path location of the file
|
||
|
@param filter filename or filtering elements
|
||
|
"""
|
||
|
try:
|
||
|
self.s3 = S3Connection(args['access_key'],args['secret_key'],calling_format=OrdinaryCallingFormat())
|
||
|
self.bucket = self.s3.get_bucket(args['bucket'].strip(),validate=False) if 'bucket' in args else None
|
||
|
# self.path = args['path']
|
||
|
self.filter = args['filter'] if 'filter' in args else None
|
||
|
self.filename = args['file'] if 'file' in args else None
|
||
|
self.bucket_name = args['bucket'] if 'bucket' in args else None
|
||
|
|
||
|
except Exception as e :
|
||
|
self.s3 = None
|
||
|
self.bucket = None
|
||
|
print (e)
|
||
|
def meta(self,**args):
|
||
|
"""
|
||
|
:name name of the bucket
|
||
|
"""
|
||
|
info = self.list(**args)
|
||
|
[item.open() for item in info]
|
||
|
return [{"name":item.name,"size":item.size} for item in info]
|
||
|
def list(self,**args):
|
||
|
"""
|
||
|
This function will list the content of a bucket, the bucket must be provided by the name
|
||
|
:name name of the bucket
|
||
|
"""
|
||
|
return list(self.s3.get_bucket(args['name']).list())
|
||
|
|
||
|
|
||
|
def buckets(self):
|
||
|
#
|
||
|
# This function will return all buckets, not sure why but it should be used cautiously
|
||
|
# based on why the s3 infrastructure is used
|
||
|
#
|
||
|
return [item.name for item in self.s3.get_all_buckets()]
|
||
|
|
||
|
# def buckets(self):
|
||
|
pass
|
||
|
# """
|
||
|
# This function is a wrapper around the bucket list of buckets for s3
|
||
|
# """
|
||
|
# return self.s3.get_all_buckets()
|
||
|
|
||
|
|
||
|
class Reader(s3) :
|
||
|
"""
|
||
|
Because s3 contains buckets and files, reading becomes a tricky proposition :
|
||
|
- list files if file is None
|
||
|
- stream content if file is Not None
|
||
|
@TODO: support read from all buckets, think about it
|
||
|
"""
|
||
|
def __init__(self,**args) :
|
||
|
s3.__init__(self,**args)
|
||
|
def files(self):
|
||
|
r = []
|
||
|
try:
|
||
|
return [item.name for item in self.bucket if item.size > 0]
|
||
|
except Exception as e:
|
||
|
pass
|
||
|
return r
|
||
|
def stream(self,limit=-1):
|
||
|
"""
|
||
|
At this point we should stream a file from a given bucket
|
||
|
"""
|
||
|
key = self.bucket.get_key(self.filename.strip())
|
||
|
if key is None :
|
||
|
yield None
|
||
|
else:
|
||
|
count = 0
|
||
|
with smart_open(key) as remote_file:
|
||
|
for line in remote_file:
|
||
|
if count == limit and limit > 0 :
|
||
|
break
|
||
|
yield line
|
||
|
count += 1
|
||
|
def read(self,**args) :
|
||
|
if self.filename is None :
|
||
|
#
|
||
|
# returning the list of files because no one file was specified.
|
||
|
return self.files()
|
||
|
else:
|
||
|
limit = args['size'] if 'size' in args else -1
|
||
|
return self.stream(limit)
|
||
|
|
||
|
class Writer(s3) :
|
||
|
|
||
|
def __init__(self,**args) :
|
||
|
s3.__init__(self,**args)
|
||
|
def mkdir(self,name):
|
||
|
"""
|
||
|
This function will create a folder in a bucket
|
||
|
:name name of the folder
|
||
|
"""
|
||
|
self.s3.put_object(Bucket=self.bucket_name,key=(name+'/'))
|
||
|
def write(self,content):
|
||
|
file = StringIO(content.decode("utf8"))
|
||
|
self.s3.upload_fileobj(file,self.bucket_name,self.filename)
|
||
|
pass
|
||
|
|