bug fix: sqlite and cursors and transport

pull/6/head
Steve Nyemba 1 year ago
parent 3f7f3d7306
commit 2bb07aedec

@ -46,6 +46,7 @@ import time
from multiprocessing import Process from multiprocessing import Process
import typer import typer
import os import os
import transport
from transport import etl from transport import etl
from transport import providers from transport import providers
@ -88,7 +89,7 @@ def move (path,index=None):
_config = _config[ int(index)] _config = _config[ int(index)]
etl.instance(**_config) etl.instance(**_config)
else: else:
etl.instance(_config) etl.instance(config=_config)
# #
# if type(_config) == dict : # if type(_config) == dict :
@ -109,19 +110,30 @@ def move (path,index=None):
# jobs.append(thread()) # jobs.append(thread())
# if _config.index(_args) == 0 : # if _config.index(_args) == 0 :
# thread.join() # thread.join()
wait(jobs) # wait(jobs)
@app.command()
def version():
print (transport.version.__version__)
@app.command() @app.command()
def generate (path:str): def generate (path:str):
__doc__="""
""" """
_config = [{"source":{"provider":"http","url":"https://cdn.wsform.com/wp-content/uploads/2020/06/agreement.csv"},"target":{"provider":"file","path":"addresses.csv","delimiter":"csv"}}] This function will generate a configuration template to give a sense of how to create one
"""
_config = [
{
"source":{"provider":"http","url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"},
"target":
[{"provider":"file","path":"addresses.csv","delimiter":"csv"},{"provider":"sqlite","database":"sample.db3","table":"addresses"}]
}
]
file = open(path,'w') file = open(path,'w')
file.write(json.dumps(_config)) file.write(json.dumps(_config))
file.close() file.close()
@app.command()
# if __name__ == '__main__' : def usage():
print (__doc__)
if __name__ == '__main__' :
app()
# # # #
# # Load information from the file ... # # Load information from the file ...
# if 'help' in SYS_ARGS : # if 'help' in SYS_ARGS :

@ -62,34 +62,25 @@ class DiskWriter(Writer):
""" """
THREAD_LOCK = Lock() THREAD_LOCK = Lock()
def __init__(self,**params): def __init__(self,**params):
Writer.__init__(self) super().__init__()
self.cache['meta'] = {'cols':0,'rows':0,'delimiter':None} self._path = params['path']
if 'path' in params: self._delimiter = params['delimiter']
self.path = params['path']
else: # def meta(self):
self.path = 'data-transport.log' # return self.cache['meta']
self.delimiter = params['delimiter'] if 'delimiter' in params else None # def isready(self):
# if 'name' in params: # """
# self.name = params['name']; # This function determines if the class is ready for execution or not
# else: # i.e it determines if the preconditions of met prior execution
# self.name = 'data-transport.log' # """
# if os.path.exists(self.path) == False: # return True
# os.mkdir(self.path) # # p = self.path is not None and os.path.exists(self.path)
def meta(self): # # q = self.name is not None
return self.cache['meta'] # # return p and q
def isready(self): # def format (self,row):
""" # self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
This function determines if the class is ready for execution or not # self.cache['meta']['rows'] += 1
i.e it determines if the preconditions of met prior execution # return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
"""
return True
# p = self.path is not None and os.path.exists(self.path)
# q = self.name is not None
# return p and q
def format (self,row):
self.cache['meta']['cols'] += len(row) if isinstance(row,list) else len(row.keys())
self.cache['meta']['rows'] += 1
return (self.delimiter.join(row) if self.delimiter else json.dumps(row))+"\n"
def write(self,info,**_args): def write(self,info,**_args):
""" """
This function writes a record to a designated file This function writes a record to a designated file
@ -97,21 +88,30 @@ class DiskWriter(Writer):
@param row row to be written @param row row to be written
""" """
try: try:
_mode = 'a' if 'overwrite' not in _args else 'w' _mode = 'a' if 'overwrite' not in _args else 'w'
DiskWriter.THREAD_LOCK.acquire() DiskWriter.THREAD_LOCK.acquire()
f = open(self.path,_mode) # # _path = _args['path'] if 'path' in _args else self.path
if self.delimiter : # # _delim= _args['delimiter'] if 'delimiter' in _args else self._delimiter
if type(info) == list : # # info.to_csv(_path,sep=_delim)
for row in info : # info.to_csv(self.path)
f.write(self.format(row)) # f = open(self.path,_mode)
else: # if self.delimiter :
f.write(self.format(info)) # if type(info) == list :
else: # for row in info :
if not type(info) == str : # f.write(self.format(row))
f.write(json.dumps(info)+"\n") # else:
else: # f.write(self.format(info))
f.write(info) # else:
f.close() # if not type(info) == str :
# f.write(json.dumps(info)+"\n")
# else:
# f.write(info)
# f.close()
_delim = self._delimiter if 'delimiter' not in _args else _args['delimiter']
_path = self.path if 'path' not in _args else _args['path']
info.to_csv(_path,index=False,sep=_delim)
pass
except Exception as e: except Exception as e:
# #
# Not sure what should be done here ... # Not sure what should be done here ...
@ -220,16 +220,19 @@ class SQLiteWriter(SQLite,DiskWriter) :
# #
# If the table doesn't exist we should create it # If the table doesn't exist we should create it
# #
def write(self,info): def write(self,info,**_args):
""" """
""" """
if not self.fields : if not self.fields :
if type(info) == pd.DataFrame :
_columns = list(info.columns)
self.init(list(info.keys())) self.init(list(info.keys()))
if type(info) == dict : if type(info) == dict :
info = [info] info = [info]
elif type(info) == pd.DataFrame : elif type(info) == pd.DataFrame :
info = info.fillna('')
info = info.to_dict(orient='records') info = info.to_dict(orient='records')
SQLiteWriter.LOCK.acquire() SQLiteWriter.LOCK.acquire()

@ -70,7 +70,7 @@ class Transporter(Process):
# self._onerror = _args['onError'] # self._onerror = _args['onError']
self._source = _args['source'] self._source = _args['source']
self._target = _args['target'] self._target = _args['target']
# #
# Let's insure we can support multiple targets # Let's insure we can support multiple targets
self._target = [self._target] if type(self._target) != list else self._target self._target = [self._target] if type(self._target) != list else self._target
@ -90,16 +90,18 @@ class Transporter(Process):
This function will write a data-frame to a designated data-store, The function is built around a delegation design pattern This function will write a data-frame to a designated data-store, The function is built around a delegation design pattern
:data data-frame or object to be written :data data-frame or object to be written
""" """
for _target in self._target : if _data.shape[0] > 0 :
if 'write' not in _target : for _target in self._target :
_target['context'] = 'write' if 'write' not in _target :
_target['lock'] = True _target['context'] = 'write'
else: # _target['lock'] = True
_target['write']['lock'] = True else:
_writer = transport.factory.instance(**_target) # _target['write']['lock'] = True
_writer.write(_data,**_args) pass
if hasattr(_writer,'close') : _writer = transport.factory.instance(**_target)
_writer.close() _writer.write(_data.copy(),**_args)
if hasattr(_writer,'close') :
_writer.close()
def write(self,_df,**_args): def write(self,_df,**_args):
""" """
@ -109,12 +111,12 @@ class Transporter(Process):
# _df = self.read() # _df = self.read()
_segments = np.array_split(np.range(_df.shape[0]),SEGMENT_COUNT) if _df.shape[0] > MAX_ROWS else np.array( [np.arange(_df.shape[0])]) _segments = np.array_split(np.range(_df.shape[0]),SEGMENT_COUNT) if _df.shape[0] > MAX_ROWS else np.array( [np.arange(_df.shape[0])])
# _index = 0 # _index = 0
for _indexes in _segments : for _indexes in _segments :
_fwd_args = {} if not _args else _args _fwd_args = {} if not _args else _args
self._delegate_write(_df.iloc[_indexes],**_fwd_args) self._delegate_write(_df.iloc[_indexes])
# #
# @TODO: Perhaps consider writing up each segment in a thread/process (speeds things up?) # @TODO: Perhaps consider writing up each segment in a thread/process (speeds things up?)
pass pass

Loading…
Cancel
Save