data transport framework for rabbitmq, mongodb, couchdb, ...

6 years ago · 0b15351b8e
parent 3443ec8da8
commit 0b15351b8e
17 changed files with 1078 additions and 0 deletions
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,19 @@
 """
 This is a build file for the 
 """
 from setuptools import setup, find_packages
 setup(
    name = "data-transport",
    version = "1.0",
    author = "The Phi Technology LLC",
    author_email = "steve@the-phi.com",
    license = "MIT",
    packages=['transport'],
    install_requires = ['pymongo','numpy','cloudant','pika','boto','flask-session','smart_open'],
    use_2to3=True,
    convert_2to3_doctests=['src/your/module/README.txt'],
    use_2to3_fixers=['your.fixers'],
    use_2to3_exclude_fixers=['lib2to3.fixes.fix_import'],
    )
--- a/transport/init.py
+++ b/transport/init.py
@ -0,0 +1,209 @@
 """
 Data Transport - 1.0
 Steve L. Nyemba, The Phi Technology LLC
 This module is designed to serve as a wrapper to a set of supported data stores :
    - couchdb
    - mongodb
    - Files (character delimited)
    - Queues (RabbmitMq)
    - Session (Flask)
    - s3
 The supported operations are read/write and providing meta data to the calling code
 Requirements :
 	pymongo
 	boto
 	couldant
 The configuration for the data-store is as follows :
 	couchdb:
 		{
 			args:{
 				url:<url>,
 				username:<username>,
 				password:<password>,
 				dbname:<database>,
 				uid:<document id>
 			}
 		}
 	RabbitMQ:
 		{
 		}
 	Mongodb:
 	{
 		args:{
 			host:<url>, #localhost:27017
 			username:<username>,
 			password:<password>,
 			dbname:<database>,
 			uid:<document id>s
 		}
 	}
 """
 __author__ = 'The Phi Technology'
 import numpy as np
 import json
 import importlib 
 from common import Reader, Writer #, factory
 # import disk
 # import queue
 # import couch
 # import mongo
 # import s3
 class factory :
 	@staticmethod
 	def instance(**args):
 		"""
 		This class will create an instance of a transport when providing 
 		:type	name of the type we are trying to create
 		:args	The arguments needed to create the instance
 		"""
 		source = args['type']		
 		params = args['args']
 		anObject = None
 		if source in ['HttpRequestReader','HttpSessionWriter']:
 			#
 			# @TODO: Make sure objects are serializable, be smart about them !!
 			#
 			aClassName = ''.join([source,'(**params)'])
 		else:
 			stream = json.dumps(params)
 			aClassName = ''.join([source,'(**',stream,')'])
 		try:
 			anObject = eval( aClassName)
 			#setattr(anObject,'name',source)
 		except Exception,e:
 			print ['Error ',e]
 		return anObject
 # class Reader:
 # 	def __init__(self):
 # 		self.nrows = 0
 # 		self.xchar = None
 # 	def row_count(self):		
 # 		content = self.read()
 # 		return np.sum([1 for row in content])
 # 	def delimiter(self,sample):
 # 		"""
 # 			This function determines the most common delimiter from a subset of possible delimiters. 
 # 			It uses a statistical approach (distribution) to guage the distribution of columns for a given delimiter
 # 			:sample sample  string/content expecting matrix i.e list of rows
 # 		"""
 # 		m = {',':[],'\t':[],'|':[],'\x3A':[]} 
 # 		delim = m.keys()
 # 		for row in sample:
 # 			for xchar in delim:
 # 				if row.split(xchar) > 1:	
 # 					m[xchar].append(len(row.split(xchar)))
 # 				else:
 # 					m[xchar].append(0)
 # 		#
 # 		# The delimiter with the smallest variance, provided the mean is greater than 1
 # 		# This would be troublesome if there many broken records sampled
 # 		#
 # 		m = {id: np.var(m[id]) for id in m.keys() if m[id] != [] and int(np.mean(m[id]))>1}
 # 		index = m.values().index( min(m.values()))
 # 		xchar = m.keys()[index]
 # 		return xchar
 # 	def col_count(self,sample):
 # 		"""
 # 		This function retirms the number of columns of a given sample
 # 		@pre self.xchar is not None
 # 		"""
 # 		m = {}
 # 		i = 0
 # 		for row in sample:
 # 			row = self.format(row)
 # 			id = str(len(row))
 # 			#id = str(len(row.split(self.xchar))) 
 # 			if id not in m:
 # 				m[id] = 0
 # 			m[id] = m[id] + 1
 # 		index = m.values().index( max(m.values()) )
 # 		ncols = int(m.keys()[index])
 # 		return ncols;
 # 	def format (self,row):
 # 		"""
 # 			This function will clean records of a given row by removing non-ascii characters
 # 			@pre self.xchar is not None
 # 		"""
 # 		if isinstance(row,list) == False:
 # 			#
 # 			# We've observed sometimes fields contain delimiter as a legitimate character, we need to be able to account for this and not tamper with the field values (unless necessary)
 # 			cols = self.split(row)
 # 			#cols = row.split(self.xchar)
 # 		else:
 # 			cols = row ;
 # 		return [ re.sub('[^\x00-\x7F,\n,\r,\v,\b,]',' ',col.strip()).strip().replace('"','') for col in cols]
 # 	def split (self,row):
 # 		"""
 # 			This function performs a split of a record and tries to attempt to preserve the integrity of the data within i.e accounting for the double quotes.
 # 			@pre : self.xchar is not None
 # 		""" 
 # 		pattern = "".join(["(?:^|",self.xchar,")(\"(?:[^\"]+|\"\")*\"|[^",self.xchar,"]*)"])
 # 		return re.findall(pattern,row.replace('\n',''))
 # class Writer:
 # 	def format(self,row,xchar):
 # 		if xchar is not None and isinstance(row,list):
 # 			return xchar.join(row)+'\n'
 # 		elif xchar is None and isinstance(row,dict):
 # 			row = json.dumps(row)
 # 		return row
 # 	"""
 # 		It is important to be able to archive data so as to insure that growth is controlled
 # 		Nothing in nature grows indefinitely neither should data being handled.
 # 	"""
 # 	def archive(self):
 # 		pass
 # 	def flush(self):
 # 		pass
 # class factory :
 # 	@staticmethod
 # 	def instance(**args):
 # 		source = args['type']		
 # 		params = args['args']
 # 		anObject = None
 # 		if source in ['HttpRequestReader','HttpSessionWriter']:
 # 			#
 # 			# @TODO: Make sure objects are serializable, be smart about them !!
 # 			#
 # 			aClassName = ''.join([source,'(**params)'])
 # 		else:
 # 			stream = json.dumps(params)
 # 			aClassName = ''.join([source,'(**',stream,')'])
 # 		try:
 # 			anObject = eval( aClassName)
 # 			#setattr(anObject,'name',source)
 # 		except Exception,e:
 # 			print ['Error ',e]
 # 		return anObject
--- a/transport/init.pyc
+++ b/transport/init.pyc
--- a/transport/common.py
+++ b/transport/common.py
@ -0,0 +1,154 @@
 """
 Data Transport - 1.0
 Steve L. Nyemba, The Phi Technology LLC
 This module is designed to serve as a wrapper to a set of supported data stores :
    - couchdb
    - mongodb
    - Files (character delimited)
    - Queues (RabbmitMq)
    - Session (Flask)
    - s3
 The supported operations are read/write and providing meta data to the calling code
 Requirements :
 	pymongo
 	boto
 	couldant
 """
 __author__ = 'The Phi Technology'
 import numpy as np
 import json
 import importlib 
 # import couch
 # import mongo
 class Reader:
 	def __init__(self):
 		self.nrows = 0
 		self.xchar = None
 	def row_count(self):		
 		content = self.read()
 		return np.sum([1 for row in content])
 	def delimiter(self,sample):
 		"""
 			This function determines the most common delimiter from a subset of possible delimiters. 
 			It uses a statistical approach (distribution) to guage the distribution of columns for a given delimiter
 			:sample sample  string/content expecting matrix i.e list of rows
 		"""
 		m = {',':[],'\t':[],'|':[],'\x3A':[]} 
 		delim = m.keys()
 		for row in sample:
 			for xchar in delim:
 				if row.split(xchar) > 1:	
 					m[xchar].append(len(row.split(xchar)))
 				else:
 					m[xchar].append(0)
 		#
 		# The delimiter with the smallest variance, provided the mean is greater than 1
 		# This would be troublesome if there many broken records sampled
 		#
 		m = {id: np.var(m[id]) for id in m.keys() if m[id] != [] and int(np.mean(m[id]))>1}
 		index = m.values().index( min(m.values()))
 		xchar = m.keys()[index]
 		return xchar
 	def col_count(self,sample):
 		"""
 		This function retirms the number of columns of a given sample
 		@pre self.xchar is not None
 		"""
 		m = {}
 		i = 0
 		for row in sample:
 			row = self.format(row)
 			id = str(len(row))
 			#id = str(len(row.split(self.xchar))) 
 			if id not in m:
 				m[id] = 0
 			m[id] = m[id] + 1
 		index = m.values().index( max(m.values()) )
 		ncols = int(m.keys()[index])
 		return ncols;
 	def format (self,row):
 		"""
 			This function will clean records of a given row by removing non-ascii characters
 			@pre self.xchar is not None
 		"""
 		if isinstance(row,list) == False:
 			#
 			# We've observed sometimes fields contain delimiter as a legitimate character, we need to be able to account for this and not tamper with the field values (unless necessary)
 			cols = self.split(row)
 			#cols = row.split(self.xchar)
 		else:
 			cols = row ;
 		return [ re.sub('[^\x00-\x7F,\n,\r,\v,\b,]',' ',col.strip()).strip().replace('"','') for col in cols]
 	def split (self,row):
 		"""
 			This function performs a split of a record and tries to attempt to preserve the integrity of the data within i.e accounting for the double quotes.
 			@pre : self.xchar is not None
 		""" 
 		pattern = "".join(["(?:^|",self.xchar,")(\"(?:[^\"]+|\"\")*\"|[^",self.xchar,"]*)"])
 		return re.findall(pattern,row.replace('\n',''))
 class Writer:
 	def format(self,row,xchar):
 		if xchar is not None and isinstance(row,list):
 			return xchar.join(row)+'\n'
 		elif xchar is None and isinstance(row,dict):
 			row = json.dumps(row)
 		return row
 	"""
 		It is important to be able to archive data so as to insure that growth is controlled
 		Nothing in nature grows indefinitely neither should data being handled.
 	"""
 	def archive(self):
 		pass
 	def flush(self):
 		pass
 # class factory :
 # 	@staticmethod
 # 	def instance(**args):
 # 		"""
 # 		This class will create an instance of a transport when providing 
 # 		:type	name of the type we are trying to create
 # 		:args	The arguments needed to create the instance
 # 		"""
 # 		source = args['type']		
 # 		params = args['args']
 # 		anObject = None
 # 		if source in ['HttpRequestReader','HttpSessionWriter']:
 # 			#
 # 			# @TODO: Make sure objects are serializable, be smart about them !!
 # 			#
 # 			aClassName = ''.join([source,'(**params)'])
 # 		else:
 # 			stream = json.dumps(params)
 # 			aClassName = ''.join([source,'(**',stream,')'])
 # 		try:
 # 			anObject = eval( aClassName)
 # 			#setattr(anObject,'name',source)
 # 		except Exception,e:
 # 			print ['Error ',e]
 # 		return anObject
--- a/transport/common.pyc
+++ b/transport/common.pyc
--- a/transport/couch.py
+++ b/transport/couch.py
@ -0,0 +1,199 @@
 """
 Data-Transport
 Steve L. Nyemba, The Phi Technology
 This file is a wrapper around couchdb using IBM Cloudant SDK that has an interface to couchdb
 """
 import cloudant
 import json
 from common import Reader,Writer
 class Couch:
 	"""
 		@param	url		host & port reference
 		@param	uid		user id involved
 		@param	dbname		database name (target)
 	"""
 	def __init__(self,**args):
 		url 		= args['url']
 		self.uid 	= args['uid']
 		dbname		= args['dbname']
 		if 'username' not in args and 'password' not in args :
 			self.server 	= cloudant.CouchDB(url=url)
 		else:
 			self.server = cloudant.CouchDB(args['username'],args['password'],url=url)
 		self.server.connect()
 		if dbname in self.server.all_dbs() :
 			self.dbase	= self.server.get(dbname,dbname,True)
 			#
 			# @TODO Check if the database exists ...
 			#
 			doc = cloudant.document.Document(self.dbase,self.uid) #self.dbase.get(self.uid)
 			if not doc.exists():
 				doc = self.dbase.create_document({"_id":self.uid})
 				doc.save()
 		else:
 			self.dbase = None
 	"""
 		Insuring the preconditions are met for processing
 	"""
 	def isready(self):
 		p = self.server.metadata() != {}
 		if p == False or not self.dbase:
 			return False
 		#
 		# At this point we are sure that the server is connected
 		# We are also sure that the database actually exists
 		#
 		doc = cloudant.document.Document(self.dbase,self.uid)
 		# q = self.dbase.all_docs(key=self.uid)['rows'] 
 		# if not q :
 		if not doc.exists():
 			return False
 		return True
 	def view(self,**args):
 		"""
 			We are executing a view
 			:id	design document _design/xxxx (provide full name with _design prefix)
 			:view_name	name of the view i.e 
 			:key	key to be used to filter the content
 		"""
 		document = cloudant.design_document.DesignDocument(self.dbase,args['id'])
 		document.fetch()
 		params = {'group_level':1,'group':True}
 		if 'key' in  args :
 			params ['key'] = args['key']
 		elif 'keys' in args :
 			params['keys'] = args['keys']
 		return document.get_view(args['view_name'])(**params)['rows']
 class CouchReader(Couch,Reader):
 	"""
 		This function will read an attachment from couchdb and return it to calling code. The attachment must have been placed before hand (otherwise oops)
 		@T: Account for security & access control
 	"""
 	def __init__(self,**args):
 		"""
 			@param	filename	filename (attachment)
 		"""
 		#
 		# setting the basic parameters for 
 		Couch.__init__(self,**args)
 		if 'filename' in args :
 			self.filename 	= args['filename']
 		else:
 			self.filename = None
 	# def isready(self):
 	# 	#
 	# 	# Is the basic information about the database valid
 	# 	#
 	# 	p = Couchdb.isready(self)
 	# 	if p == False:
 	# 		return False
 	# 	#
 	# 	# The database name is set and correct at this point
 	# 	# We insure the document of the given user has the requested attachment.
 	# 	# 
 	# 	doc = self.dbase.get(self.uid)
 	# 	if '_attachments' in doc:
 	# 		r = self.filename in doc['_attachments'].keys()
 	# 	else:
 	# 		r = False
 	# 	return r	
 	def stream(self):
 		#
 		# @TODO Need to get this working ...
 		#
 		document = cloudant.document.Document(self.dbase,self.uid)
 		# content = self.dbase.fetch_attachment(self.uid,self.filename).split('\n') ;
 		content = self.get_attachment(self.filename)
 		for row in content:
 			yield row
 	def read(self,size=-1):
 		if self.filename is not None:
 			self.stream()
 		else:
 			return self.basic_read()
 	def basic_read(self):
 		document = cloudant.document.Document(self.dbase,self.uid)
 		# document = self.dbase.get(self.uid)
 		if document.exists() :			
 			document.fetch()
 			document = dict(document)
 			del document['_rev']
 		else:
 			document = {}
 		return document
 class CouchWriter(Couch,Writer):		
 	"""
 		This class will write on a couchdb document provided a scope
 		The scope is the attribute that will be on the couchdb document
 	"""
 	def __init__(self,**args):
 		"""
 			@param	uri		host & port reference
 			@param	uid		user id involved
 			@param	filename	filename (attachment)
 			@param	dbname		database name (target)
 		"""
 		Couch.__init__(self,**args)
 	def write(self,**params):
 		"""
 			write a given attribute to a document database
 			@param	label	scope of the row repair|broken|fixed|stats
 			@param	row	row to be written
 		"""
 		# document = self.dbase.get(self.uid)
 		document = cloudant.document.Document(self.dbase,self.uid) #.get(self.uid)
 		if document.exists() is False :
 			document = self.dbase.create_document({"_id":self.uid})
 		label = params['label']
 		row	= params['row']
 		if label not in document :
 			document[label] = []
 		document[label].append(row)
 		document.save()
 		# self.dbase.bulk_docs([document])
 		# self.dbase.save_doc(document)
 	def archive(self,params=None):
 		"""
 		This function will archive the document onto itself. 		
 		"""
 		# document = self.dbase.all_docs(self.uid,include_docs=True)
 		document = cloudant.document.Document(self.dbase,self.filename)
 		document.fetch()
 		content = {}
 		# _doc = {}
 		for id in document:
 			if  id not in ['_id','_rev','_attachments'] :
 				content[id] = document[id]
 				del document[id]
 		content = json.dumps(content)	
 		# document= _doc
 		now = str(datetime.today())
 		name = '-'.join([document['_id'] , now,'.json'])			
 		# self.dbase.bulk_docs([document])
 		# self.dbase.put_attachment(document,content,name,'application/json')
 		document.put_attachment(self.dbase,name,'application/json',content)
 		document.save()
--- a/transport/couch.pyc
+++ b/transport/couch.pyc
--- a/transport/couchdb.pyc
+++ b/transport/couchdb.pyc
--- a/transport/disk.py
+++ b/transport/disk.py
@ -0,0 +1,82 @@
 import os
 from .__init__ import Reader,Writer
 import json
 class DiskReader(Reader) :
 	"""
 	This class is designed to read data from disk (location on hard drive)
 	@pre : isready() == True
 	"""
 	def __init__(self,**params):
 		"""
 			@param	path	absolute path of the file to be read
 		"""
 		Reader.__init__(self)
 		self.path = params['path'] ;
 	def isready(self):
 		return os.path.exists(self.path) 
 	def read(self,size=-1):
 		"""
 			This function reads the rows from a designated location on disk
 			@param	size	number of rows to be read, -1 suggests all rows
 		"""
 		f = open(self.path,'rU') 
 		i = 1
 		for row in f:
 			i += 1
 			if size == i:
 				break
 			yield row
 		f.close()
 class DiskWriter(Writer):
 	"""
 		This function writes output to disk in a designated location
 	"""
 	def __init__(self,**params):
 		if 'path' in params:
 			self.path = params['path']
 		else:
 			self.path = None
 		if 'name' in params:
 			self.name = params['name'];
 		else:
 			self.name = None
 		if os.path.exists(self.path) == False:
 			os.mkdir(self.path)
 	def isready(self):
 		"""
 			This function determines if the class is ready for execution or not
 			i.e it determines if the preconditions of met prior execution
 		"""
 		p =  self.path is not None and os.path.exists(self.path)
 		q = self.name is not None 
 		return p and q
 	def write(self,**params):
 		"""
 			This function writes a record to a designated file
 			@param	label	<passed|broken|fixed|stats>
 			@param	row	row to be written
 		"""
 		label 	= params['label']
 		row 	= params['row']
 		xchar = None
 		if 'xchar' is not None:
 			xchar 	= params['xchar']
 		path = ''.join([self.path,os.sep,label])
 		if os.path.exists(path) == False:
 			os.mkdir(path) ;
 		path = ''.join([path,os.sep,self.name]) 
 		f = open(path,'a')
 		row = self.format(row,xchar);
 		f.write(row)
 		f.close()
--- a/transport/disk.pyc
+++ b/transport/disk.pyc
--- a/transport/mongo.py
+++ b/transport/mongo.py
@ -0,0 +1,66 @@
 from pymongo import MongoClient
 # from transport import Reader,Writer
 from common import Reader, Writer
 import json
 class Mongo :
    """
    Basic mongodb functions are captured here
    """
    def __init__(self,**args):
        """
            :dbname     database name/identifier
            :host   host and port of the database
            :username   username for authentication
            :password   password for current user
        """
        host = args['host']
        if 'user' in args and 'password' in args:        
            self.client = MongoClient(host,
                      username=args['username'] ,
                      password=args['password'] ,
                      authMechanism='SCRAM-SHA-256')
        else:
            self.client = MongoClient()                    
        self.uid    = args['uid']  #-- document identifier
        self.dbname = args['dbname']
        self.db = self.client[self.dbname]
    def isready(self):
        p = self.dbname in self.client.list_database_names() 
        q = self.uid in self.client[self.dbname].list_collection_names()
        return p and q
 class MongoReader(Mongo,Reader):
    """
    This class will read from a mongodb data store and return the content of a document (not a collection)
    """
    def __init__(self,**args):
        Mongo.__init__(self,**args)
    def read(self,size=-1):
        collection = self.db[self.uid]
        return collection.find({})
    def view(self,**args):
        """
        This function is designed to execute a view (map/reduce) operation
        """
        pass
 class MongoWriter(Mongo,Writer):
    """
    This class is designed to write to a mongodb collection within a database
    """
    def __init__(self,**args):
        Mongo.__init__(self,**args)
    def write(self,**args):
        # document  = self.db[self.uid].find()
        collection = self.db[self.uid]
        collection.update_one()
        self.db[self.uid].insert_one(args['row'])
    def set(self,document):
        collection = self.db[self.uid]
        if collection.count_document() > 0 :
            collection.delete({_id:self.uid})
        collecton.update_one({"_id":self.uid},document,True)
--- a/transport/mongo.pyc
+++ b/transport/mongo.pyc
--- a/transport/queue.py
+++ b/transport/queue.py
@ -0,0 +1,200 @@
 import pika
 from datetime import datetime
 import re
 import json
 import os
 from common import Reader, Writer
 import json
 class MessageQueue:
 	"""
 		This class hierarchy is designed to handle interactions with a queue server using pika framework (our tests are based on rabbitmq)
 		:host	
 		:uid	identifier of the exchange
 		:qid	identifier of the queue
 	"""
 	def __init__(self,**params):
 		self.host= params['host']
 		self.uid = params['uid']
 		self.qid = params['qid']
 	def isready(self):
 		#self.init()
 		resp =  self.connection is not None and self.connection.is_open
 		self.close()
 		return resp
 	def close(self):
            if self.connection.is_closed == False :
 		self.channel.close()
 		self.connection.close()
 class QueueWriter(MessageQueue,Writer):
 	"""
 		This class is designed to publish content to an AMQP (Rabbitmq)
 		The class will rely on pika to implement this functionality
 		We will publish information to a given queue for a given exchange
 	"""
 	def __init__(self,**params):
 		#self.host= params['host']
 		#self.uid = params['uid']
 		#self.qid = params['queue']
 		MessageQueue.__init__(self,**params);
 	def init(self,label=None):
 		properties = pika.ConnectionParameters(host=self.host)
 		self.connection = pika.BlockingConnection(properties)
 		self.channel	= self.connection.channel()
 		self.info = self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True)
 		if label is None:
 			self.qhandler = self.channel.queue_declare(queue=self.qid,durable=True)	
 		else:
 			self.qhandler = self.channel.queue_declare(queue=label,durable=True)
 		self.channel.queue_bind(exchange=self.uid,queue=self.qhandler.method.queue) 
 	"""
 		This function writes a stream of data to the a given queue
 		@param object	object to be written (will be converted to JSON)
 		@TODO: make this less chatty
 	"""
 	def write(self,**params):
 		xchar = None
 		if  'xchar' in params:
 			xchar = params['xchar']
 		object = self.format(params['row'],xchar)
 		label	= params['label']
 		self.init(label)
 		_mode = 2
 		if isinstance(object,str):
 			stream = object
 			_type = 'text/plain'
 		else:
 			stream = json.dumps(object)
 			if 'type' in params :
 				_type = params['type']
 			else:
 				_type = 'application/json'
 		self.channel.basic_publish(
 			exchange=self.uid,
 			routing_key=label,
 			body=stream,
 			properties=pika.BasicProperties(content_type=_type,delivery_mode=_mode)
 		);
 		self.close()
 	def flush(self,label):
 		self.init(label)
 		_mode = 1  #-- Non persistent
 		self.channel.queue_delete( queue=label);
 		self.close()
 class QueueReader(MessageQueue,Reader):
 	"""
 	This class will read from a queue provided an exchange, queue and host
 	@TODO: Account for security and virtualhosts
 	"""
 	def __init__(self,**params):
 		"""
 			@param	host	host
 			@param	uid	exchange identifier
 			@param	qid	queue identifier
 		"""
 		#self.host= params['host']
 		#self.uid = params['uid']
 		#self.qid = params['qid']
 		MessageQueue.__init__(self,**params);
 		if 'durable' in params :
 			self.durable = True
 		else:
 			self.durable = False
 		self.size = -1
 		self.data = {}
 	def init(self,qid):
 		properties = pika.ConnectionParameters(host=self.host)
 		self.connection = pika.BlockingConnection(properties)
 		self.channel	= self.connection.channel()
 		self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True)
 		self.info = self.channel.queue_declare(queue=qid,durable=True)
 	def callback(self,channel,method,header,stream):
 		"""
 			This is the callback function designed to process the data stream from the queue
 		"""
 		r = []
 		if re.match("^\{|\[",stream) is not None:
 			r = json.loads(stream)
 		else:
 			r = stream
 		qid = self.info.method.queue
 		if qid not in self.data :
 			self.data[qid] = []
 		self.data[qid].append(r)
 		#
 		# We stop reading when the all the messages of the queue are staked
 		#
 		if self.size == len(self.data[qid]) or len(self.data[qid]) == self.info.method.message_count:		
 			self.close()
 	def read(self,size=-1):
 		"""
 		This function will read, the first message from a queue
 		@TODO: 
 			Implement channel.basic_get in order to retrieve a single message at a time
 			Have the number of messages retrieved be specified by size (parameter)
 		"""
 		r = {}
 		self.size = size
 		#
 		# We enabled the reader to be able to read from several queues (sequentially for now)
 		# The qid parameter will be an array of queues the reader will be reading from
 		#
 		if isinstance(self.qid,basestring) :
 					self.qid = [self.qid]
 		for qid in self.qid:
 			self.init(qid)
 			# r[qid] = []
 			if self.info.method.message_count > 0:
 				self.channel.basic_consume(self.callback,queue=qid,no_ack=False);
 				self.channel.start_consuming()
 			else:
 				pass
 				#self.close()
 			# r[qid].append( self.data)
 		return self.data
 class QueueListener(QueueReader):
 	def init(self,qid):
 		properties = pika.ConnectionParameters(host=self.host)
 		self.connection = pika.BlockingConnection(properties)
 		self.channel	= self.connection.channel()
 		self.channel.exchange_declare(exchange=self.uid,type='direct',durable=True )
 		self.info = self.channel.queue_declare(passive=True,exclusive=True,queue=qid)
 		self.channel.queue_bind(exchange=self.uid,queue=self.info.method.queue,routing_key=qid)
 		#self.callback = callback
 	def read(self):
 		self.init(self.qid)
 		self.channel.basic_consume(self.callback,queue=self.qid,no_ack=True);
 		self.channel.start_consuming()
--- a/transport/queue.pyc
+++ b/transport/queue.pyc
--- a/transport/s3.py
+++ b/transport/s3.py
@ -0,0 +1,83 @@
 from datetime import datetime
 import boto
 import botocore
 from smart_open import smart_open
 from common import Reader, Writer
 import json
 from common import Reader, Writer
 class s3 :
 	"""
 		@TODO: Implement a search function for a file given a bucket??
 	"""
 	def __init__(self,args) :
 		"""
 			This function will extract a file or set of files from s3 bucket provided
 			@param access_key
 			@param secret_key
 			@param path		location of the file
 			@param filter		filename or filtering elements
 		"""
 		try:
 			self.s3 = boto.connect_s3(args['access_key'],args['secret_key'])
 			self.bucket = self.s3.get_bucket(args['bucket'].strip(),validate=False) if 'bucket' in args else None
 			# self.path = args['path']
 			self.filter = args['filter'] if 'filter' in args else None
 			self.filename = args['file'] if 'file' in args else None
 		except Exception as e :
 			self.s3 = None
 			self.bucket = None
 			print (e)
 	def buckets(self):
 		# def buckets(self):
 		pass
 		# """
 		# This function is a wrapper around the bucket list of buckets for s3
 		# """
 		# return self.s3.get_all_buckets()
 class s3Reader(s3,Reader) :
 	"""	
 		Because s3 contains buckets and files, reading becomes a tricky proposition :
 		- list files		if file is None
 		- stream content	if file is Not None
 		@TODO: support read from all buckets, think about it
 	"""
 	def __init__(self,args) :
 			s3.__init__(self,args)
 	def files(self):
 		r = []
 		try:
 			return [item.name for item in self.bucket if item.size > 0]
 		except Exception as e:
 			pass
 		return r
 	def stream(self,limit=-1):
 		"""
 			At this point we should stream a file from a given bucket
 		"""
 		key = self.bucket.get_key(self.filename.strip())
 		if key is None :
 			yield None
 		else:
 			count = 0
 			with smart_open(key) as remote_file:
 				for line in remote_file:
 					if count == limit and limit > 0 :
 						break
 				yield line
 				count += 1
 	def read(self,limit=-1) :
 		if self.filename is None :
 			# 
 		# returning the list of files because no one file was specified.
 			return self.files()
 		else:
 			return self.stream(10)
 class s3Writer(s3,Writer) :
        def __init__(self,args) :
        	s3.__init__(self,args)
--- a/transport/s3.pyc
+++ b/transport/s3.pyc
--- a/transport/session.py
+++ b/transport/session.py
@ -0,0 +1,66 @@
 from flask import request, session
 from datetime import datetime
 import re
 from common import Reader, Writer
 import json
 class HttpRequestReader(Reader):
    """
    This class is designed to read data from an Http request file handler provided to us by flask
    The file will be heald in memory and processed accordingly
    NOTE: This is inefficient and can crash a micro-instance (becareful)
    """
 	def __init__(self,**params):
 		self.file_length = 0
 		try:
 			#self.file = params['file']	
 			#self.file.seek(0, os.SEEK_END)
 			#self.file_length = self.file.tell()
 			#print 'size of file ',self.file_length
 			self.content = params['file'].readlines()
 			self.file_length = len(self.content)
 		except Exception, e:
 			print "Error ... ",e
 			pass
 	def isready(self):
 		return self.file_length > 0
 	def read(self,size =-1):
 		i = 1
 		for row in self.content:
 			i += 1
 			if size == i:
 				break
 			yield row
 class HttpSessionWriter(Writer):
    """
        This class is designed to write data to a session/cookie
    """
 	def __init__(self,**params):
        """
            @param key	required session key
        """
 		self.session = params['queue']
 		self.session['sql'] = []
 		self.session['csv'] = []
 		self.tablename = re.sub('..+$','',params['filename'])
 		self.session['uid'] = params['uid']
 		#self.xchar = params['xchar']
 	def format_sql(self,row):
 		values = "','".join([col.replace('"','').replace("'",'') for col in row])
 		return "".join(["INSERT INTO :table VALUES('",values,"');\n"]).replace(':table',self.tablename)		
 	def isready(self):
 		return True
 	def write(self,**params):
 		label = params['label']
 		row = params ['row']
 		if label == 'usable':
 			self.session['csv'].append(self.format(row,','))
 			self.session['sql'].append(self.format_sql(row))