Compare commits

...

15 Commits

Author SHA1 Message Date
Steve L. Nyemba 80005acd52 Merge pull request 'v2.2.0' (#5) from v2.2.0 into master
4 weeks ago
Steve Nyemba 01ead6e387 version update: etl bug fix
4 weeks ago
Steve Nyemba aeff2a214b bug fix, schema should be a string for SQLAlchemy ~ 2.0.0
4 weeks ago
Steve Nyemba a6a1fd9f2e generating template for auth-file
1 month ago
Steve Nyemba 3376347edc generating template for auth-file
1 month ago
Steve L. Nyemba 34a00361fd
Merge branch 'master' into v2.2.0
1 month ago
Steve L. Nyemba 5672911bed
Merge pull request #33 from lnyemba/v2.2.0
6 months ago
Steve L. Nyemba dd9fd27e71
Merge pull request #32 from lnyemba/v2.2.0
6 months ago
Steve L. Nyemba 5ec747850f
Merge pull request #31 from lnyemba/v2.2.0
6 months ago
Steve L. Nyemba 46a2cda128
Merge pull request #30 from lnyemba/v2.2.0
6 months ago
Steve L. Nyemba 537ab56fd6
Merge pull request #29 from lnyemba/v2.2.0
6 months ago
Steve L. Nyemba 3c1d9f826e
Merge branch 'master' into v2.2.0
6 months ago
Steve Nyemba de87b33a68 update registry and installer
6 months ago
Steve Nyemba f2e7a0cf0c ...
6 months ago
Steve L. Nyemba f86ef2cde4
Merge pull request #28 from lnyemba/v2.2.0
6 months ago

@ -177,7 +177,25 @@ def register (label:Annotated[str,typer.Argument(help="unique label that will be
except Exception as e: except Exception as e:
_msg = f"""{TIMES_MARK} {e}""" _msg = f"""{TIMES_MARK} {e}"""
print (_msg) print (_msg)
@app_r.command(name="template")
def template(name:Annotated[str,typer.Argument(help="database technology provider" ) ]):
"""
This function will generate a template entry for the registry (content of an auth file)
"""
#
# retrieve the provider and display the template if it has one
for _module in ['sql','cloud','warehouse','nosql','other'] :
ref = getattr(transport,_module) if hasattr(transport,_module) else None
_entry = {}
if ref :
if hasattr(ref,name) :
_pointer = getattr(ref,name)
_entry = dict({'provider':name},**_pointer.template()) if hasattr(_pointer,'template') else {}
break
#
#
print ( json.dumps(_entry))
pass
@app_r.command(name="list") @app_r.command(name="list")
def register_list (): def register_list ():
""" """

@ -46,9 +46,4 @@ include = [ "transport", "transport.*"]
[tool.setuptools.dynamic] [tool.setuptools.dynamic]
version = {attr = "transport.info.__version__"} version = {attr = "transport.info.__version__"}
#authors = {attr = "transport.__author__"}
# If you have a info.py file, you might also want to include the author dynamically:
# [tool.setuptools.dynamic]
# version = {attr = "info.__version__"}
#authors = {attr = "info.__author__"} #authors = {attr = "info.__author__"}

@ -14,6 +14,8 @@ import numpy as np
import time import time
MAX_CHUNK = 2000000 MAX_CHUNK = 2000000
def template ():
return {'provider':'bigquery','private_key':'path-to-key','dataset':'name-of-dataset','table':'table'}
class BigQuery: class BigQuery:
def __init__(self,**_args): def __init__(self,**_args):
path = _args['service_key'] if 'service_key' in _args else _args['private_key'] path = _args['service_key'] if 'service_key' in _args else _args['private_key']
@ -23,6 +25,7 @@ class BigQuery:
self.dtypes = _args['dtypes'] if 'dtypes' in _args else None self.dtypes = _args['dtypes'] if 'dtypes' in _args else None
self.table = _args['table'] if 'table' in _args else None self.table = _args['table'] if 'table' in _args else None
self.client = bq.Client.from_service_account_json(self.path) self.client = bq.Client.from_service_account_json(self.path)
def meta(self,**_args): def meta(self,**_args):
""" """
This function returns meta data for a given table or query with dataset/table properly formatted This function returns meta data for a given table or query with dataset/table properly formatted

@ -17,6 +17,8 @@ import sqlalchemy
# from transport.common import Reader,Writer # from transport.common import Reader,Writer
import pandas as pd import pandas as pd
def template ():
return {'provider':'databricks','host':'fqn-host','token':'token','cluster_path':'path-of-cluster','catalog':'name-of-catalog','database':'schema-or-database','table':'table'}
class Bricks: class Bricks:
""" """

@ -8,6 +8,8 @@ import pandas as pd
from io import StringIO from io import StringIO
import json import json
import nextcloud_client as nextcloud import nextcloud_client as nextcloud
def template():
return {"url":None,"token":None,"uid":None,"file":None}
class Nextcloud : class Nextcloud :
def __init__(self,**_args): def __init__(self,**_args):

@ -20,6 +20,8 @@ from io import StringIO
import pandas as pd import pandas as pd
import json import json
def template():
return {'access_key':'access-key','secret_key':'secret-key','region':'region','bucket':'name-of-bucket','file':'file-name','chunksize':10000}
class s3 : class s3 :
""" """
@TODO: Implement a search function for a file given a bucket?? @TODO: Implement a search function for a file given a bucket??

@ -1,6 +1,6 @@
__app_name__ = 'data-transport' __app_name__ = 'data-transport'
__author__ = 'The Phi Technology' __author__ = 'The Phi Technology'
__version__= '2.2.30' __version__= '2.2.42'
__email__ = "info@the-phi.com" __email__ = "info@the-phi.com"
__edition__= 'community' __edition__= 'community'
__license__=f""" __license__=f"""

@ -111,8 +111,8 @@ class IETL(IReader) :
_data = super().read(**_args) _data = super().read(**_args)
_schema = super().meta() _schema = super().meta()
for _kwargs in self._targets : for _kwargs in self._targets :
if _schema : # if _schema :
_kwargs['schema'] = _schema # _kwargs['schema'] = _schema
self.post(_data,**_kwargs) self.post(_data,**_kwargs)
return _data return _data

@ -11,6 +11,8 @@ import sys
# from transport.common import Reader, Writer # from transport.common import Reader, Writer
from datetime import datetime from datetime import datetime
def template():
return {'dbname':'database','doc':'document','username':'username','password':'password','url':'url-with-port'}
class Couch: class Couch:
""" """

@ -19,7 +19,8 @@ import json
import re import re
from multiprocessing import Lock, RLock from multiprocessing import Lock, RLock
from transport.common import IEncoder from transport.common import IEncoder
def template():
return {'provider':'mongodb','host':'localhost','port':27017,'db':'db-name','collection':'collection-name','username':'username','password':'password','mechanism':'SCRAM-SHA-256'}
class Mongo : class Mongo :
lock = RLock() lock = RLock()
""" """

@ -4,6 +4,10 @@ This file is a wrapper around pandas built-in functionalities to handle characte
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import os import os
def template():
return {'path':None,'delimiter':None}
class File : class File :
def __init__(self,**params): def __init__(self,**params):
""" """

@ -7,7 +7,8 @@ import requests
from io import StringIO from io import StringIO
import pandas as pd import pandas as pd
def template():
return {'url':None, 'headers':{'key':'value'}}
class Reader: class Reader:
""" """
This class is designed to read data from an Http request file handler provided to us by flask This class is designed to read data from an Http request file handler provided to us by flask

@ -17,6 +17,10 @@ import sys
# from common import Reader, Writer # from common import Reader, Writer
import json import json
from multiprocessing import RLock from multiprocessing import RLock
def template():
return {'port':5672,'host':'localhost','queue':None,'vhost':None,'username':None,'password':None}
class MessageQueue: class MessageQueue:
""" """
This class hierarchy is designed to handle interactions with a queue server using pika framework (our tests are based on rabbitmq) This class hierarchy is designed to handle interactions with a queue server using pika framework (our tests are based on rabbitmq)

@ -51,6 +51,7 @@ def init (email,path=REGISTRY_PATH,override=False,_file=REGISTRY_FILE):
p = '@' in email p = '@' in email
#q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org'] #q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org']
q = len(email.split('.')[-1]) in [2,3] q = len(email.split('.')[-1]) in [2,3]
if p and q : if p and q :
_config = {"email":email,'version':__version__} _config = {"email":email,'version':__version__}
if not os.path.exists(path): if not os.path.exists(path):

@ -6,7 +6,8 @@ import sqlalchemy as sqa
from sqlalchemy import text , MetaData, inspect from sqlalchemy import text , MetaData, inspect
import pandas as pd import pandas as pd
def template():
return {'host':'localhost','database':'database','table':'table'}
class Base: class Base:
def __init__(self,**_args): def __init__(self,**_args):
self._host = _args['host'] if 'host' in _args else 'localhost' self._host = _args['host'] if 'host' in _args else 'localhost'

@ -2,7 +2,8 @@
This module implements the handler for duckdb (in memory or not) This module implements the handler for duckdb (in memory or not)
""" """
from transport.sql.common import Base, BaseReader, BaseWriter from transport.sql.common import Base, BaseReader, BaseWriter
def template ():
return {'database':'path-to-database','table':'table'}
class Duck : class Duck :
def __init__(self,**_args): def __init__(self,**_args):
# #

@ -1,8 +1,13 @@
""" """
This file implements support for mysql and maria db (with drivers mysql+mysql) This file implements support for mysql and maria db (with drivers mysql+mysql)
""" """
from transport.sql.common import BaseReader, BaseWriter from transport.sql.common import BaseReader, BaseWriter, template as _template
# import transport.sql.common.template as _template
# import mysql.connector as my # import mysql.connector as my
def template ():
return dict(_template(),**{'port':3306})
class MYSQL: class MYSQL:
def get_provider(self): def get_provider(self):

@ -1,6 +1,9 @@
import nzpy as nz import nzpy as nz
from transport.sql.common import BaseReader, BaseWriter from transport.sql.common import BaseReader, BaseWriter, template as _template
def template ():
return dict(_template(),**{'port':5480})
class Netezza: class Netezza:
def get_provider(self): def get_provider(self):
return 'netezza+nzpy' return 'netezza+nzpy'

@ -1,10 +1,13 @@
from transport.sql.common import BaseReader , BaseWriter from transport.sql.common import BaseReader , BaseWriter, template as _template
from psycopg2.extensions import register_adapter, AsIs from psycopg2.extensions import register_adapter, AsIs
import numpy as np import numpy as np
register_adapter(np.int64, AsIs) register_adapter(np.int64, AsIs)
def template ():
return dict(_template(),**{'port':5432,'chunksize':10000})
class PG: class PG:
def __init__(self,**_args): def __init__(self,**_args):
super().__init__(**_args) super().__init__(**_args)

@ -1,6 +1,8 @@
import sqlalchemy import sqlalchemy
import pandas as pd import pandas as pd
from transport.sql.common import Base, BaseReader, BaseWriter from transport.sql.common import Base, BaseReader, BaseWriter
def template():
return {'database':'path-to-database','table':'table'}
class SQLite (BaseReader): class SQLite (BaseReader):
def __init__(self,**_args): def __init__(self,**_args):
super().__init__(**_args) super().__init__(**_args)

@ -3,7 +3,11 @@ Handling Microsoft SQL Server via pymssql driver/connector
""" """
import sqlalchemy import sqlalchemy
import pandas as pd import pandas as pd
from transport.sql.common import Base, BaseReader, BaseWriter from transport.sql.common import Base, BaseReader, BaseWriter, template as _template
def template ():
return dict(_template(),**{'port':1433})
class MsSQLServer: class MsSQLServer:

@ -3,6 +3,10 @@ import pandas as pd
from .. sql.common import BaseReader , BaseWriter from .. sql.common import BaseReader , BaseWriter
import sqlalchemy as sqa import sqlalchemy as sqa
def template():
return {'host':'localhost','port':8047,'ssl':False,'table':None,'database':None}
class Drill : class Drill :
__template = {'host':None,'port':None,'ssl':None,'table':None,'database':None} __template = {'host':None,'port':None,'ssl':None,'table':None,'database':None}
def __init__(self,**_args): def __init__(self,**_args):

@ -11,6 +11,8 @@ from pyspark.sql.types import *
from pyspark.sql.functions import col, to_date, to_timestamp from pyspark.sql.functions import col, to_date, to_timestamp
import copy import copy
def template():
return {'catalog':None,'database':None,'table':None}
class Iceberg : class Iceberg :
def __init__(self,**_args): def __init__(self,**_args):
""" """

Loading…
Cancel
Save