bug fixes: drill inheritance and met data function

v2.4
Steve Nyemba 20 hours ago
parent 685aac7d6b
commit a1cf78a889

@ -19,7 +19,7 @@ args = {
"packages": find_packages(include=['info','transport', 'transport.*'])} "packages": find_packages(include=['info','transport', 'transport.*'])}
args["keywords"]=['mongodb','duckdb','couchdb','rabbitmq','file','read','write','s3','sqlite'] args["keywords"]=['mongodb','duckdb','couchdb','rabbitmq','file','read','write','s3','sqlite']
args["install_requires"] = ['pyncclient','duckdb-engine','pymongo','sqlalchemy','pandas','typer','pandas-gbq','numpy','cloudant','pika','nzpy','termcolor','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python','numpy','pymssql'] args["install_requires"] = ['pyncclient','duckdb-engine','pymongo','sqlalchemy','pandas','typer','pandas-gbq','numpy','cloudant','pika','nzpy','termcolor','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python','numpy','pymssql','pyspark']
args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git" args["url"] = "https://healthcareio.the-phi.com/git/code/transport.git"
args['scripts'] = ['bin/transport'] args['scripts'] = ['bin/transport']
# if sys.version_info[0] == 2 : # if sys.version_info[0] == 2 :

@ -1,19 +0,0 @@
"""
This file will be intended to handle duckdb database
"""
import duckdb
from transport.common import Reader,Writer
class Duck(Reader):
def __init__(self,**_args):
super().__init__(**_args)
self._path = None if 'path' not in _args else _args['path']
self._handler = duckdb.connect() if not self._path else duckdb.connect(self._path)
class DuckReader(Duck) :
def __init__(self,**_args):
super().__init__(**_args)
def read(self,**_args) :
pass

@ -46,6 +46,7 @@ AWS_S3 = 's3'
RABBIT = RABBITMQ RABBIT = RABBITMQ
ICEBERG='iceberg' ICEBERG='iceberg'
APACHE_ICEBERG = 'iceberg' APACHE_ICEBERG = 'iceberg'
DRILL = 'drill'
APACHE_DRILL = 'drill'
# QLISTENER = 'qlistener' # QLISTENER = 'qlistener'

@ -9,8 +9,9 @@ import pandas as pd
class Base: class Base:
def __init__(self,**_args): def __init__(self,**_args):
# print ([' ## ',_args])
self._host = _args['host'] if 'host' in _args else 'localhost' self._host = _args['host'] if 'host' in _args else 'localhost'
self._port = None self._port = None if 'port' not in _args else _args['port']
self._database = _args['database'] self._database = _args['database']
self._table = _args['table'] if 'table' in _args else None self._table = _args['table'] if 'table' in _args else None
self._engine= sqa.create_engine(self._get_uri(**_args),future=True) self._engine= sqa.create_engine(self._get_uri(**_args),future=True)
@ -105,6 +106,7 @@ class BaseReader(SQLBase):
if 'sql' in _args : if 'sql' in _args :
sql = _args['sql'] sql = _args['sql']
else: else:
# print (dir (self))
_table = _args['table'] if 'table' in _args else self._table _table = _args['table'] if 'table' in _args else self._table
sql = f'SELECT * FROM {_table}' sql = f'SELECT * FROM {_table}'
return self.apply(sql) return self.apply(sql)

@ -4,4 +4,4 @@ This namespace/package is intended to handle read/writes against data warehouse
- clickhouse (...) - clickhouse (...)
""" """
from . import iceberg from . import iceberg, drill

@ -1,4 +1,10 @@
"""
dependency:
- spark and SPARK_HOME environment variable must be set
"""
from pyspark.sql import SparkSession from pyspark.sql import SparkSession
from pyspark import SparkContext
import copy import copy
class Iceberg : class Iceberg :
@ -7,10 +13,16 @@ class Iceberg :
providing catalog meta information (you must get this from apache iceberg) providing catalog meta information (you must get this from apache iceberg)
""" """
# #
# Turning off logging (it's annoying & un-professional)
#
# _spconf = SparkContext()
# _spconf.setLogLevel("ERROR")
#
# @TODO: # @TODO:
# Make arrangements for additional configuration elements # Make arrangements for additional configuration elements
# #
self._session = SparkSession.builder.getOrCreate() self._session = SparkSession.builder.getOrCreate()
# self._session.sparkContext.setLogLevel("ERROR")
self._catalog = self._session.catalog self._catalog = self._session.catalog
self._table = _args['table'] if 'table' in _args else None self._table = _args['table'] if 'table' in _args else None

Loading…
Cancel
Save