bug fix: sqlite and cursors and transport

2 years ago · 2bb07aedec
parent 3f7f3d7306
commit 2bb07aedec
3 changed files with 80 additions and 63 deletions
--- a/bin/transport
+++ b/bin/transport
@ -46,6 +46,7 @@ import time
 from multiprocessing import Process
 import typer
 import os
+import transport
 from transport import etl
 from transport import providers

@ -88,7 +89,7 @@ def move (path,index=None):
            _config = _config[ int(index)]
            etl.instance(**_config)
        else:
-            etl.instance(_config)
+            etl.instance(config=_config)
                 
        #
        # if type(_config) == dict :
@ -109,19 +110,30 @@ def move (path,index=None):
        #         jobs.append(thread())
        #         if _config.index(_args) == 0 :
        #             thread.join()
-            wait(jobs)
-
+            # wait(jobs)
+@app.command()
+def version():
+     print (transport.version.__version__)
@app.command()
 def generate (path:str):
-	__doc__="""
-	
 	"""
-	_config = [{"source":{"provider":"http","url":"https://cdn.wsform.com/wp-content/uploads/2020/06/agreement.csv"},"target":{"provider":"file","path":"addresses.csv","delimiter":"csv"}}]
+	This function will generate a configuration template to give a sense of how to create one
+	"""
+	_config = [
+          {
+               "source":{"provider":"http","url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"},
+             "target":
+            [{"provider":"file","path":"addresses.csv","delimiter":"csv"},{"provider":"sqlite","database":"sample.db3","table":"addresses"}]
+            }
+            ]
 	file = open(path,'w')
 	file.write(json.dumps(_config))
 	file.close()
-	
-# if __name__ == '__main__' :
+@app.command()
+def usage():
+     print (__doc__)	
+if __name__ == '__main__' :
+     app()
 # 	#
 # 	# Load information from the file ...
 # 	if 'help' in SYS_ARGS :
--- a/transport/disk.py
+++ b/transport/disk.py
@ -62,34 +62,25 @@ class DiskWriter(Writer):
 	"""
 	THREAD_LOCK = Lock()
 	def __init__(self,**params):
-		Writer.__init__(self)
-		self.cache['meta'] = {'cols':0,'rows':0,'delimiter':None}
-		if 'path' in params:
-			self.path = params['path']
-		else:
-			self.path = 'data-transport.log'
-		self.delimiter = params['delimiter'] if 'delimiter' in params else None
-		# if 'name' in params:
-		# 	self.name = params['name'];
-		# else:
-		# 	self.name = 'data-transport.log'
-		# if os.path.exists(self.path) == False:
-		# 	os.mkdir(self.path)
-	def meta(self):
-		return self.cache['meta']
-	def isready(self):
-		"""
-			This function determines if the class is ready for execution or not
-			i.e it determines if the preconditions of met prior execution
-		"""
-		return True
-		# p =  self.path is not None and os.path.exists(self.path)
-		# q = self.name is not None 
-		# return p and q
-	def format (self,row):
-		self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
-		self.cache['meta']['rows'] += 1
-		return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
+		super().__init__()
+		self._path = params['path']
+		self._delimiter = params['delimiter']
+		
+	# def meta(self):
+	# 	return self.cache['meta']
+	# def isready(self):
+	# 	"""
+	# 		This function determines if the class is ready for execution or not
+	# 		i.e it determines if the preconditions of met prior execution
+	# 	"""
+	# 	return True
+	# 	# p =  self.path is not None and os.path.exists(self.path)
+	# 	# q = self.name is not None 
+	# 	# return p and q
+	# def format (self,row):
+	# 	self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
+	# 	self.cache['meta']['rows'] += 1
+	# 	return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
 	def write(self,info,**_args):
 		"""
 			This function writes a record to a designated file
@ -97,21 +88,30 @@ class DiskWriter(Writer):
 			@param	row	row to be written
 		"""
 		try:
+			
 			_mode = 'a' if 'overwrite' not in _args else 'w'
 			DiskWriter.THREAD_LOCK.acquire()
-			f = open(self.path,_mode)
-			if self.delimiter :
-				if type(info) == list :
-					for row in info :
-						f.write(self.format(row))
-				else:
-					f.write(self.format(info))
-			else:
-				if not type(info) == str :
-					f.write(json.dumps(info)+"\n")
-				else:
-					f.write(info)
-			f.close()
+			# # _path = _args['path'] if 'path' in _args else self.path
+			# # _delim= _args['delimiter'] if 'delimiter' in _args else self._delimiter
+			# # info.to_csv(_path,sep=_delim)
+			# info.to_csv(self.path)
+			# f = open(self.path,_mode)
+			# if self.delimiter :
+			# 	if type(info) == list :
+			# 		for row in info :
+			# 			f.write(self.format(row))
+			# 	else:
+			# 		f.write(self.format(info))
+			# else:
+			# 	if not type(info) == str :
+			# 		f.write(json.dumps(info)+"\n")
+			# 	else:
+			# 		f.write(info)
+			# f.close()
+			_delim = self._delimiter if 'delimiter' not in _args else _args['delimiter']
+			_path = self.path if 'path' not  in _args else _args['path']
+			info.to_csv(_path,index=False,sep=_delim)
+			pass
 		except Exception as e:
 			#
 			# Not sure what should be done here ...
@ -220,16 +220,19 @@ class SQLiteWriter(SQLite,DiskWriter) :
 		#
 		# If the table doesn't exist we should create it
 		#
-	def write(self,info):
+	def write(self,info,**_args):
 		"""
 		"""
 		
 		if not self.fields :
+			if type(info) == pd.DataFrame :
+				_columns = list(info.columns) 
 			self.init(list(info.keys()))
 		
 		if type(info) == dict :
 			info = [info]
 		elif type(info) == pd.DataFrame :
+			info = info.fillna('')
 			info = info.to_dict(orient='records')
 		
 		SQLiteWriter.LOCK.acquire()
--- a/transport/etl.py
+++ b/transport/etl.py
@ -90,14 +90,16 @@ class Transporter(Process):
        This function will write a data-frame to a designated data-store, The function is built around a delegation design pattern
        :data   data-frame or object to be written
        """
+        if _data.shape[0] > 0 :
            for _target in self._target :
                if 'write' not in _target :
                    _target['context'] = 'write'
-                _target['lock'] = True
+                    # _target['lock'] = True
                else:
-                _target['write']['lock'] = True
+                    # _target['write']['lock'] = True
+                    pass
                _writer = transport.factory.instance(**_target)
-            _writer.write(_data,**_args)
+                _writer.write(_data.copy(),**_args)
                if hasattr(_writer,'close') :
                    _writer.close()
        
@ -114,7 +116,7 @@ class Transporter(Process):
        for _indexes in _segments :
            _fwd_args = {} if not _args else _args
            
-            self._delegate_write(_df.iloc[_indexes],**_fwd_args)
+            self._delegate_write(_df.iloc[_indexes])
            #
            # @TODO: Perhaps consider writing up each segment in a thread/process (speeds things up?)
            pass