bug fixes: drill inheritance and met data function

1 month ago · a1cf78a889
parent 685aac7d6b
commit a1cf78a889
6 changed files with 19 additions and 23 deletions
--- a/setup.py
+++ b/setup.py
@ -19,7 +19,7 @@ args    = {

    "packages": find_packages(include=['info','transport', 'transport.*'])}
 args["keywords"]=['mongodb','duckdb','couchdb','rabbitmq','file','read','write','s3','sqlite']
-args["install_requires"] = ['pyncclient','duckdb-engine','pymongo','sqlalchemy','pandas','typer','pandas-gbq','numpy','cloudant','pika','nzpy','termcolor','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python','numpy','pymssql']
+args["install_requires"] = ['pyncclient','duckdb-engine','pymongo','sqlalchemy','pandas','typer','pandas-gbq','numpy','cloudant','pika','nzpy','termcolor','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python','numpy','pymssql','pyspark']
 args["url"] =   "https://healthcareio.the-phi.com/git/code/transport.git"
 args['scripts'] = ['bin/transport']
 # if sys.version_info[0] == 2 :
--- a/transport/duck.py
+++ b/transport/duck.py
@ -1,19 +0,0 @@
-"""
-This file will be intended to handle duckdb database
-"""
-
-import duckdb
-from transport.common import Reader,Writer
-
-class Duck(Reader):
-    def __init__(self,**_args):
-        super().__init__(**_args)
-        self._path = None if 'path' not in _args else _args['path']
-        self._handler = duckdb.connect() if not self._path else duckdb.connect(self._path)
-
-
-class DuckReader(Duck) :
-    def __init__(self,**_args):
-        super().__init__(**_args)
-    def read(self,**_args) :
-        pass
--- a/transport/providers/init.py
+++ b/transport/providers/init.py
@ -46,6 +46,7 @@ AWS_S3  = 's3'
 RABBIT = RABBITMQ
 ICEBERG='iceberg'
 APACHE_ICEBERG = 'iceberg'
-
+DRILL = 'drill'
+APACHE_DRILL = 'drill'
 # QLISTENER = 'qlistener'
    
--- a/transport/sql/common.py
+++ b/transport/sql/common.py
@ -9,8 +9,9 @@ import pandas as pd

 class Base:
    def __init__(self,**_args):
+        # print ([' ## ',_args])
        self._host = _args['host'] if 'host' in _args else 'localhost'
-        self._port = None
+        self._port = None if 'port' not in _args else _args['port']
        self._database = _args['database']
        self._table = _args['table'] if 'table' in _args else None
        self._engine= sqa.create_engine(self._get_uri(**_args),future=True)
@ -105,6 +106,7 @@ class BaseReader(SQLBase):
        if 'sql' in _args :
            sql = _args['sql']
        else:
+            # print (dir (self))
            _table = _args['table'] if 'table' in _args else self._table
            sql = f'SELECT * FROM {_table}'
        return self.apply(sql)
--- a/transport/warehouse/init.py
+++ b/transport/warehouse/init.py
@ -4,4 +4,4 @@ This namespace/package is intended to handle read/writes against data warehouse
    - clickhouse (...)
 """

-from . import iceberg
+from . import iceberg, drill
--- a/transport/warehouse/iceberg.py
+++ b/transport/warehouse/iceberg.py
@ -1,4 +1,10 @@
+"""
+dependency:
+    - spark and SPARK_HOME environment variable must be set
+"""
 from pyspark.sql import SparkSession
+from pyspark import SparkContext
+
 import copy

 class Iceberg :
@ -7,10 +13,16 @@ class Iceberg :
        providing catalog meta information (you must get this from apache iceberg)
        """
        #
+        # Turning off logging (it's annoying & un-professional)
+        #
+        # _spconf = SparkContext()
+        # _spconf.setLogLevel("ERROR")
+        #
        # @TODO:
        #   Make arrangements for additional configuration elements 
        #
        self._session = SparkSession.builder.getOrCreate()
+        # self._session.sparkContext.setLogLevel("ERROR")
        self._catalog = self._session.catalog
        self._table = _args['table'] if 'table' in _args else None