You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
210 lines
5.1 KiB
Python
210 lines
5.1 KiB
Python
5 years ago
|
"""
|
||
|
Data Transport - 1.0
|
||
|
Steve L. Nyemba, The Phi Technology LLC
|
||
|
|
||
|
This module is designed to serve as a wrapper to a set of supported data stores :
|
||
|
- couchdb
|
||
|
- mongodb
|
||
|
- Files (character delimited)
|
||
|
- Queues (RabbmitMq)
|
||
|
- Session (Flask)
|
||
|
- s3
|
||
|
The supported operations are read/write and providing meta data to the calling code
|
||
|
Requirements :
|
||
|
pymongo
|
||
|
boto
|
||
|
couldant
|
||
|
The configuration for the data-store is as follows :
|
||
|
couchdb:
|
||
|
{
|
||
|
args:{
|
||
|
url:<url>,
|
||
|
username:<username>,
|
||
|
password:<password>,
|
||
|
dbname:<database>,
|
||
5 years ago
|
# m[xchar].append(0)
|
||
|
|
||
|
|
||
|
|
||
|
# #
|
||
|
# # The delimiter with the smallest variance, provided the mean is greater than 1
|
||
|
# # This would be troublesome if there many broken records sampled
|
||
|
# #
|
||
|
# m = {id: np.var(m[id]) for id in m.keys() if m[id] != [] and int(np.mean(m[id]))>1}
|
||
|
# index = m.values().index( min(m.values()))
|
||
|
# xchar = m.keys()[index]
|
||
|
|
||
|
# return xchar
|
||
|
# def col_count(self,sample):
|
||
|
# """
|
||
|
# This function retirms the number of columns of a given sample
|
||
|
# @pre self.xchar is not None
|
||
|
# """
|
||
|
|
||
|
# m = {}
|
||
|
# i = 0
|
||
|
|
||
|
# for row in sample:
|
||
|
# row = self.format(row)
|
||
|
# id = str(len(row))
|
||
|
# #id = str(len(row.split(self.xchar)))
|
||
|
|
||
|
# if id not in m:
|
||
|
# m[id] = 0
|
||
|
# m[id] = m[id] + 1
|
||
|
|
||
|
# index = m.values().index( max(m.values()) )
|
||
|
# ncols = int(m.keys()[index])
|
||
|
|
||
|
|
||
|
# return ncols;
|
||
|
# def format (self,row):
|
||
|
# """
|
||
|
# This function will clean records of a given row by removing non-ascii characters
|
||
|
# @pre self.xchar is not None
|
||
|
# """
|
||
|
|
||
|
# if isinstance(row,list) == False:
|
||
|
# #
|
||
|
# # We've observed sometimes fields contain delimiter as a legitimate character, we need to be able to account for this and not tamper with the field values (unless necessary)
|
||
|
# cols = self.split(row)
|
||
|
# #cols = row.split(self.xchar)
|
||
|
# else:
|
||
|
# cols = row ;
|
||
|
# return [ re.sub('[^\x00-\x7F,\n,\r,\v,\b,]',' ',col.strip()).strip().replace('"','') for col in cols]
|
||
|
|
||
|
# def split (self,row):
|
||
|
# """
|
||
|
# This function performs a split of a record and tries to attempt to preserve the integrity of the data within i.e accounting for the double quotes.
|
||
|
# @pre : self.xchar is not None
|
||
|
# """
|
||
|
|
||
|
# pattern = "".join(["(?:^|",self.xchar,")(\"(?:[^\"]+|\"\")*\"|[^",self.xchar,"]*)"])
|
||
|
# return re.findall(pattern,row.replace('\n',''))
|
||
|
|
||
|
|
||
|
# class Writer:
|
||
|
|
||
|
# def format(self,row,xchar):
|
||
|
# if xchar is not None and isinstance(row,list):
|
||
|
# return xchar.join(row)+'\n'
|
||
|
# elif xchar is None and isinstance(row,dict):
|
||
|
# row = json.dumps(row)
|
||
|
# return row
|
||
|
# """
|
||
|
# It is important to be able to archive data so as to insure that growth is controlled
|
||
|
# Nothing in nature grows indefinitely neither should data being handled.
|
||
|
# """
|
||
|
# def archive(self):
|
||
|
# pass
|
||
|
# def flush(self):
|
||
|
# pass
|
||
|
|
||
|
# class factory :
|
||
|
# @staticmethod
|
||
|
# def instance(**args):
|
||
|
|
||
|
# source = args['type']
|
||
|
# params = args['args']
|
||
|
# anObject = None
|
||
|
|
||
|
# if source in ['HttpRequestReader','HttpSessionWriter']:
|
||
|
# #
|
||
|
# # @TODO: Make sure objects are serializable, be smart about them !!
|
||
|
# #
|
||
|
# aClassName = ''.join([source,'(**params)'])
|
||
|
|
||
|
|
||
|
# else:
|
||
|
|
||
|
# stream = json.dumps(params)
|
||
|
# aClassName = ''.join([source,'(**',stream,')'])
|
||
|
# try:
|
||
|
# anObject = eval( aClassName)
|
||
|
# #setattr(anObject,'name',source)
|
||
|
# except Exception,e:
|
||
|
# print ['Error ',e]
|
||
|
# return anObject
|