v2.2.0 #5

Merged
steve merged 34 commits from v2.2.0 into master 5 days ago

@ -177,7 +177,25 @@ def register (label:Annotated[str,typer.Argument(help="unique label that will be
except Exception as e: except Exception as e:
_msg = f"""{TIMES_MARK} {e}""" _msg = f"""{TIMES_MARK} {e}"""
print (_msg) print (_msg)
@app_r.command(name="template")
def template(name:Annotated[str,typer.Argument(help="database technology provider" ) ]):
"""
This function will generate a template entry for the registry (content of an auth file)
"""
#
# retrieve the provider and display the template if it has one
for _module in ['sql','cloud','warehouse','nosql','other'] :
ref = getattr(transport,_module) if hasattr(transport,_module) else None
_entry = {}
if ref :
if hasattr(ref,name) :
_pointer = getattr(ref,name)
_entry = dict({'provider':name},**_pointer.template()) if hasattr(_pointer,'template') else {}
break
#
#
print ( json.dumps(_entry))
pass
@app_r.command(name="list") @app_r.command(name="list")
def register_list (): def register_list ():
""" """

@ -46,9 +46,4 @@ include = [ "transport", "transport.*"]
[tool.setuptools.dynamic] [tool.setuptools.dynamic]
version = {attr = "transport.info.__version__"} version = {attr = "transport.info.__version__"}
#authors = {attr = "transport.__author__"}
# If you have a info.py file, you might also want to include the author dynamically:
# [tool.setuptools.dynamic]
# version = {attr = "info.__version__"}
#authors = {attr = "info.__author__"} #authors = {attr = "info.__author__"}

@ -14,6 +14,8 @@ import numpy as np
import time import time
MAX_CHUNK = 2000000 MAX_CHUNK = 2000000
def template ():
return {'provider':'bigquery','private_key':'path-to-key','dataset':'name-of-dataset','table':'table'}
class BigQuery: class BigQuery:
def __init__(self,**_args): def __init__(self,**_args):
path = _args['service_key'] if 'service_key' in _args else _args['private_key'] path = _args['service_key'] if 'service_key' in _args else _args['private_key']
@ -23,6 +25,7 @@ class BigQuery:
self.dtypes = _args['dtypes'] if 'dtypes' in _args else None self.dtypes = _args['dtypes'] if 'dtypes' in _args else None
self.table = _args['table'] if 'table' in _args else None self.table = _args['table'] if 'table' in _args else None
self.client = bq.Client.from_service_account_json(self.path) self.client = bq.Client.from_service_account_json(self.path)
def meta(self,**_args): def meta(self,**_args):
""" """
This function returns meta data for a given table or query with dataset/table properly formatted This function returns meta data for a given table or query with dataset/table properly formatted

@ -17,6 +17,8 @@ import sqlalchemy
# from transport.common import Reader,Writer # from transport.common import Reader,Writer
import pandas as pd import pandas as pd
def template ():
return {'provider':'databricks','host':'fqn-host','token':'token','cluster_path':'path-of-cluster','catalog':'name-of-catalog','database':'schema-or-database','table':'table'}
class Bricks: class Bricks:
""" """

@ -8,6 +8,8 @@ import pandas as pd
from io import StringIO from io import StringIO
import json import json
import nextcloud_client as nextcloud import nextcloud_client as nextcloud
def template():
return {"url":None,"token":None,"uid":None,"file":None}
class Nextcloud : class Nextcloud :
def __init__(self,**_args): def __init__(self,**_args):

@ -20,6 +20,8 @@ from io import StringIO
import pandas as pd import pandas as pd
import json import json
def template():
return {'access_key':'access-key','secret_key':'secret-key','region':'region','bucket':'name-of-bucket','file':'file-name','chunksize':10000}
class s3 : class s3 :
""" """
@TODO: Implement a search function for a file given a bucket?? @TODO: Implement a search function for a file given a bucket??

@ -1,6 +1,6 @@
__app_name__ = 'data-transport' __app_name__ = 'data-transport'
__author__ = 'The Phi Technology' __author__ = 'The Phi Technology'
__version__= '2.2.30' __version__= '2.2.42'
__email__ = "info@the-phi.com" __email__ = "info@the-phi.com"
__edition__= 'community' __edition__= 'community'
__license__=f""" __license__=f"""

@ -111,8 +111,8 @@ class IETL(IReader) :
_data = super().read(**_args) _data = super().read(**_args)
_schema = super().meta() _schema = super().meta()
for _kwargs in self._targets : for _kwargs in self._targets :
if _schema : # if _schema :
_kwargs['schema'] = _schema # _kwargs['schema'] = _schema
self.post(_data,**_kwargs) self.post(_data,**_kwargs)
return _data return _data

@ -11,6 +11,8 @@ import sys
# from transport.common import Reader, Writer # from transport.common import Reader, Writer
from datetime import datetime from datetime import datetime
def template():
return {'dbname':'database','doc':'document','username':'username','password':'password','url':'url-with-port'}
class Couch: class Couch:
""" """

@ -19,7 +19,8 @@ import json
import re import re
from multiprocessing import Lock, RLock from multiprocessing import Lock, RLock
from transport.common import IEncoder from transport.common import IEncoder
def template():
return {'provider':'mongodb','host':'localhost','port':27017,'db':'db-name','collection':'collection-name','username':'username','password':'password','mechanism':'SCRAM-SHA-256'}
class Mongo : class Mongo :
lock = RLock() lock = RLock()
""" """

@ -4,6 +4,10 @@ This file is a wrapper around pandas built-in functionalities to handle characte
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import os import os
def template():
return {'path':None,'delimiter':None}
class File : class File :
def __init__(self,**params): def __init__(self,**params):
""" """

@ -7,7 +7,8 @@ import requests
from io import StringIO from io import StringIO
import pandas as pd import pandas as pd
def template():
return {'url':None, 'headers':{'key':'value'}}
class Reader: class Reader:
""" """
This class is designed to read data from an Http request file handler provided to us by flask This class is designed to read data from an Http request file handler provided to us by flask

@ -17,6 +17,10 @@ import sys
# from common import Reader, Writer # from common import Reader, Writer
import json import json
from multiprocessing import RLock from multiprocessing import RLock
def template():
return {'port':5672,'host':'localhost','queue':None,'vhost':None,'username':None,'password':None}
class MessageQueue: class MessageQueue:
""" """
This class hierarchy is designed to handle interactions with a queue server using pika framework (our tests are based on rabbitmq) This class hierarchy is designed to handle interactions with a queue server using pika framework (our tests are based on rabbitmq)

@ -51,6 +51,7 @@ def init (email,path=REGISTRY_PATH,override=False,_file=REGISTRY_FILE):
p = '@' in email p = '@' in email
#q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org'] #q = False if '.' not in email else email.split('.')[-1] in ['edu','com','io','ai','org']
q = len(email.split('.')[-1]) in [2,3] q = len(email.split('.')[-1]) in [2,3]
if p and q : if p and q :
_config = {"email":email,'version':__version__} _config = {"email":email,'version':__version__}
if not os.path.exists(path): if not os.path.exists(path):

@ -6,7 +6,8 @@ import sqlalchemy as sqa
from sqlalchemy import text , MetaData, inspect from sqlalchemy import text , MetaData, inspect
import pandas as pd import pandas as pd
def template():
return {'host':'localhost','database':'database','table':'table'}
class Base: class Base:
def __init__(self,**_args): def __init__(self,**_args):
self._host = _args['host'] if 'host' in _args else 'localhost' self._host = _args['host'] if 'host' in _args else 'localhost'

@ -2,7 +2,8 @@
This module implements the handler for duckdb (in memory or not) This module implements the handler for duckdb (in memory or not)
""" """
from transport.sql.common import Base, BaseReader, BaseWriter from transport.sql.common import Base, BaseReader, BaseWriter
def template ():
return {'database':'path-to-database','table':'table'}
class Duck : class Duck :
def __init__(self,**_args): def __init__(self,**_args):
# #

@ -1,8 +1,13 @@
""" """
This file implements support for mysql and maria db (with drivers mysql+mysql) This file implements support for mysql and maria db (with drivers mysql+mysql)
""" """
from transport.sql.common import BaseReader, BaseWriter from transport.sql.common import BaseReader, BaseWriter, template as _template
# import transport.sql.common.template as _template
# import mysql.connector as my # import mysql.connector as my
def template ():
return dict(_template(),**{'port':3306})
class MYSQL: class MYSQL:
def get_provider(self): def get_provider(self):

@ -1,6 +1,9 @@
import nzpy as nz import nzpy as nz
from transport.sql.common import BaseReader, BaseWriter from transport.sql.common import BaseReader, BaseWriter, template as _template
def template ():
return dict(_template(),**{'port':5480})
class Netezza: class Netezza:
def get_provider(self): def get_provider(self):
return 'netezza+nzpy' return 'netezza+nzpy'

@ -1,10 +1,13 @@
from transport.sql.common import BaseReader , BaseWriter from transport.sql.common import BaseReader , BaseWriter, template as _template
from psycopg2.extensions import register_adapter, AsIs from psycopg2.extensions import register_adapter, AsIs
import numpy as np import numpy as np
register_adapter(np.int64, AsIs) register_adapter(np.int64, AsIs)
def template ():
return dict(_template(),**{'port':5432,'chunksize':10000})
class PG: class PG:
def __init__(self,**_args): def __init__(self,**_args):
super().__init__(**_args) super().__init__(**_args)

@ -1,6 +1,8 @@
import sqlalchemy import sqlalchemy
import pandas as pd import pandas as pd
from transport.sql.common import Base, BaseReader, BaseWriter from transport.sql.common import Base, BaseReader, BaseWriter
def template():
return {'database':'path-to-database','table':'table'}
class SQLite (BaseReader): class SQLite (BaseReader):
def __init__(self,**_args): def __init__(self,**_args):
super().__init__(**_args) super().__init__(**_args)

@ -3,7 +3,11 @@ Handling Microsoft SQL Server via pymssql driver/connector
""" """
import sqlalchemy import sqlalchemy
import pandas as pd import pandas as pd
from transport.sql.common import Base, BaseReader, BaseWriter from transport.sql.common import Base, BaseReader, BaseWriter, template as _template
def template ():
return dict(_template(),**{'port':1433})
class MsSQLServer: class MsSQLServer:

@ -3,6 +3,10 @@ import pandas as pd
from .. sql.common import BaseReader , BaseWriter from .. sql.common import BaseReader , BaseWriter
import sqlalchemy as sqa import sqlalchemy as sqa
def template():
return {'host':'localhost','port':8047,'ssl':False,'table':None,'database':None}
class Drill : class Drill :
__template = {'host':None,'port':None,'ssl':None,'table':None,'database':None} __template = {'host':None,'port':None,'ssl':None,'table':None,'database':None}
def __init__(self,**_args): def __init__(self,**_args):

@ -11,6 +11,8 @@ from pyspark.sql.types import *
from pyspark.sql.functions import col, to_date, to_timestamp from pyspark.sql.functions import col, to_date, to_timestamp
import copy import copy
def template():
return {'catalog':None,'database':None,'table':None}
class Iceberg : class Iceberg :
def __init__(self,**_args): def __init__(self,**_args):
""" """

Loading…
Cancel
Save