adding templates to the class hierarchies, helps with wizard

v2.4
Steve Nyemba 1 week ago
parent ea1cb7b1bb
commit 5dbe541025

@ -104,7 +104,7 @@ def generate (path:Annotated[str,typer.Argument(help="path of the ETL configurat
{ {
"source":{"provider":"http","url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"}, "source":{"provider":"http","url":"https://raw.githubusercontent.com/codeforamerica/ohana-api/master/data/sample-csv/addresses.csv"},
"target": "target":
[{"provider":"files","path":"addresses.csv","delimiter":","},{"provider":"sqlite","database":"sample.db3","table":"addresses"}] [{"provider":"files","path":"addresses.csv","delimiter":","},{"provider":"sqlite3","database":"sample.db3","table":"addresses"}]
} }
] ]
file = open(path,'w') file = open(path,'w')

@ -1,6 +1,6 @@
__app_name__ = 'data-transport' __app_name__ = 'data-transport'
__author__ = 'The Phi Technology' __author__ = 'The Phi Technology'
__version__= '2.4.3' __version__= '2.4.4'
__email__ = "info@the-phi.com" __email__ = "info@the-phi.com"
__license__=f""" __license__=f"""
Copyright 2010 - 2024, Steve L. Nyemba Copyright 2010 - 2024, Steve L. Nyemba

@ -15,7 +15,7 @@ import time
MAX_CHUNK = 2000000 MAX_CHUNK = 2000000
class BigQuery: class BigQuery:
__template__= {"private_key":None,"dataset":None,"table":None,} __template__= {"private_key":None,"dataset":None,"table":None}
def __init__(self,**_args): def __init__(self,**_args):
path = _args['service_key'] if 'service_key' in _args else _args['private_key'] path = _args['service_key'] if 'service_key' in _args else _args['private_key']
self.credentials = service_account.Credentials.from_service_account_file(path) self.credentials = service_account.Credentials.from_service_account_file(path)

@ -16,7 +16,8 @@ from datetime import datetime
import pandas as pd import pandas as pd
import os import os
import sys import sys
import itertools
import json
class IO: class IO:
""" """
@ -29,9 +30,9 @@ class IO:
# #
# registry.init() # registry.init()
self._logger = _logger #transport.get.writer(label='logger') #if registry.has('logger') else None self._logger = _logger if not type(_agent) in [IReader,IWriter] else _agent._logger #transport.get.writer(label='logger') #if registry.has('logger') else None
if not _logger and hasattr(_agent,'_logger') : # if not _logger and hasattr(_agent,'_logger') :
self._logger = getattr(_agent,'_logger') # self._logger = getattr(_agent,'_logger')
self._agent = _agent self._agent = _agent
_date = _date = str(datetime.now()) _date = _date = str(datetime.now())
self._logTable = 'logs' #'_'.join(['logs',_date[:10]+_date[11:19]]).replace(':','').replace('-','_') self._logTable = 'logs' #'_'.join(['logs',_date[:10]+_date[11:19]]).replace(':','').replace('-','_')
@ -47,8 +48,11 @@ class IO:
_date = str(datetime.now()) _date = str(datetime.now())
_data = dict({'pid':os.getpid(),'date':_date[:10],'time':_date[11:19]},**_args) _data = dict({'pid':os.getpid(),'date':_date[:10],'time':_date[11:19]},**_args)
for key in _data : for key in _data :
_data[key] = str(_data[key]) _data[key] = str(_data[key]) if type(_data[key]) not in [list,dict] else json.dumps(_data[key])
self._logger.write(pd.DataFrame([_data])) #,table=self._logTable) self._logger.write(pd.DataFrame([_data])) #,table=self._logTable)
else:
print ([' ********** '])
print (_args)
def _init_plugins(self,_items): def _init_plugins(self,_items):
""" """
This function will load pipelined functions as a plugin loader This function will load pipelined functions as a plugin loader
@ -117,12 +121,19 @@ class IReader(IO):
if self._plugins : if self._plugins :
return self._stream(_data) return self._stream(_data)
else: else:
self.log(action='streaming',object=_objectName, input= {'memory_size':sys.getsizeof(_data)}) _count = 0
return _data for _segment in _data :
_count += 1
yield _segment
self.log(action='streaming',object=_objectName, input= {'segments':_count})
# return _data
else: else:
self.log(action='read',object=_objectName, input=_data.shape) self.log(action='read',object=_objectName, input=_data.shape)
if self._plugins : if self._plugins :
_data = self._plugins.apply(_data) _logs = []
_data = self._plugins.apply(_data,self.log)
return _data return _data
# if self._plugins and self._plugins.ratio() > 0 : # if self._plugins and self._plugins.ratio() > 0 :
@ -144,7 +155,10 @@ class IWriter(IO):
if 'plugins' in _args : if 'plugins' in _args :
self._init_plugins(_args['plugins']) self._init_plugins(_args['plugins'])
if self._plugins and self._plugins.ratio() > 0 : if self._plugins and self._plugins.ratio() > 0 :
_data = self._plugins.apply(_data,self._logger) _logs = []
_data = self._plugins.apply(_data,_logs,self.log)
# [self.log(**_item) for _item in _logs]
try: try:
# IWriter.lock.acquire() # IWriter.lock.acquire()
self._agent.write(_data,**_args) self._agent.write(_data,**_args)
@ -161,7 +175,13 @@ class IETL(IReader) :
This class performs an ETL operation by ineriting a read and adding writes as pipeline functions This class performs an ETL operation by ineriting a read and adding writes as pipeline functions
""" """
def __init__(self,**_args): def __init__(self,**_args):
super().__init__(transport.get.reader(**_args['source'])) _source = _args['source']
_plugins = _source['plugins'] if 'plugins' in _source else None
# super().__init__(transport.get.reader(**_args['source']))
super().__init__(transport.get.reader(**_source),_plugins)
# _logger =
if 'target' in _args: if 'target' in _args:
self._targets = _args['target'] if type(_args['target']) == list else [_args['target']] self._targets = _args['target'] if type(_args['target']) == list else [_args['target']]
else: else:

@ -114,8 +114,7 @@ class PluginLoader :
_n = len(self._names) _n = len(self._names)
return len(set(self._modules.keys()) & set (self._names)) / _n return len(set(self._modules.keys()) & set (self._names)) / _n
def apply(self,_data,_logger=None): def apply(self,_data,_logger=[]):
_rows = []
_input= {} _input= {}
for _name in self._modules : for _name in self._modules :
@ -136,8 +135,7 @@ class PluginLoader :
print (e) print (e)
if _logger: if _logger:
_logger(**_input) _logger(_input)
pass
return _data return _data
# def apply(self,_data,_name): # def apply(self,_data,_name):
# """ # """

@ -9,6 +9,7 @@ import pandas as pd
class Base: class Base:
__template__={"host":None,"port":1,"database":None,"table":None,"username":None,"password":None}
def __init__(self,**_args): def __init__(self,**_args):
# print ([' ## ',_args]) # print ([' ## ',_args])
self._host = _args['host'] if 'host' in _args else 'localhost' self._host = _args['host'] if 'host' in _args else 'localhost'

@ -7,6 +7,7 @@ from transport.sql.common import Base, BaseReader, BaseWriter
class MsSQLServer: class MsSQLServer:
def __init__(self,**_args) : def __init__(self,**_args) :
super().__init__(**_args) super().__init__(**_args)
pass pass

Loading…
Cancel
Save