From a1cf78a8897809e0596d976600f436921b7d3bbc Mon Sep 17 00:00:00 2001
From: Steve Nyemba <nyemba@gmail.com>
Date: Fri, 18 Oct 2024 11:41:52 -0500
Subject: [PATCH] bug fixes: drill inheritance and met data function

---
 setup.py                        |  2 +-
 transport/duck.py               | 19 -------------------
 transport/providers/__init__.py |  3 ++-
 transport/sql/common.py         |  4 +++-
 transport/warehouse/__init__.py |  2 +-
 transport/warehouse/iceberg.py  | 12 ++++++++++++
 6 files changed, 19 insertions(+), 23 deletions(-)
 delete mode 100644 transport/duck.py

diff --git a/setup.py b/setup.py
index 7bb44e8..dcb52a0 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ args    = {
 
     "packages": find_packages(include=['info','transport', 'transport.*'])}
 args["keywords"]=['mongodb','duckdb','couchdb','rabbitmq','file','read','write','s3','sqlite']
-args["install_requires"] = ['pyncclient','duckdb-engine','pymongo','sqlalchemy','pandas','typer','pandas-gbq','numpy','cloudant','pika','nzpy','termcolor','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python','numpy','pymssql']
+args["install_requires"] = ['pyncclient','duckdb-engine','pymongo','sqlalchemy','pandas','typer','pandas-gbq','numpy','cloudant','pika','nzpy','termcolor','boto3','boto','pyarrow','google-cloud-bigquery','google-cloud-bigquery-storage','flask-session','smart_open','botocore','psycopg2-binary','mysql-connector-python','numpy','pymssql','pyspark']
 args["url"] =   "https://healthcareio.the-phi.com/git/code/transport.git"
 args['scripts'] = ['bin/transport']
 # if sys.version_info[0] == 2 :
diff --git a/transport/duck.py b/transport/duck.py
deleted file mode 100644
index 7d580c9..0000000
--- a/transport/duck.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-This file will be intended to handle duckdb database
-"""
-
-import duckdb
-from transport.common import Reader,Writer
-
-class Duck(Reader):
-    def __init__(self,**_args):
-        super().__init__(**_args)
-        self._path = None if 'path' not in _args else _args['path']
-        self._handler = duckdb.connect() if not self._path else duckdb.connect(self._path)
-
-
-class DuckReader(Duck) :
-    def __init__(self,**_args):
-        super().__init__(**_args)
-    def read(self,**_args) :
-        pass
\ No newline at end of file
diff --git a/transport/providers/__init__.py b/transport/providers/__init__.py
index 35779d1..556df39 100644
--- a/transport/providers/__init__.py
+++ b/transport/providers/__init__.py
@@ -46,6 +46,7 @@ AWS_S3  = 's3'
 RABBIT = RABBITMQ
 ICEBERG='iceberg'
 APACHE_ICEBERG = 'iceberg'
-
+DRILL = 'drill'
+APACHE_DRILL = 'drill'
 # QLISTENER = 'qlistener'
     
\ No newline at end of file
diff --git a/transport/sql/common.py b/transport/sql/common.py
index 0a55ed7..cea285e 100644
--- a/transport/sql/common.py
+++ b/transport/sql/common.py
@@ -9,8 +9,9 @@ import pandas as pd
 
 class Base:
     def __init__(self,**_args):
+        # print ([' ## ',_args])
         self._host = _args['host'] if 'host' in _args else 'localhost'
-        self._port = None
+        self._port = None if 'port' not in _args else _args['port']
         self._database = _args['database']
         self._table = _args['table'] if 'table' in _args else None
         self._engine= sqa.create_engine(self._get_uri(**_args),future=True)
@@ -105,6 +106,7 @@ class BaseReader(SQLBase):
         if 'sql' in _args :
             sql = _args['sql']
         else:
+            # print (dir (self))
             _table = _args['table'] if 'table' in _args else self._table
             sql = f'SELECT * FROM {_table}'
         return self.apply(sql)
diff --git a/transport/warehouse/__init__.py b/transport/warehouse/__init__.py
index d82324e..bcd76fd 100644
--- a/transport/warehouse/__init__.py
+++ b/transport/warehouse/__init__.py
@@ -4,4 +4,4 @@ This namespace/package is intended to handle read/writes against data warehouse
     - clickhouse (...)
 """
 
-from . import iceberg
\ No newline at end of file
+from . import iceberg, drill
\ No newline at end of file
diff --git a/transport/warehouse/iceberg.py b/transport/warehouse/iceberg.py
index 4f6eca0..a22c16e 100644
--- a/transport/warehouse/iceberg.py
+++ b/transport/warehouse/iceberg.py
@@ -1,4 +1,10 @@
+"""
+dependency:
+    - spark and SPARK_HOME environment variable must be set
+"""
 from pyspark.sql import SparkSession
+from pyspark import SparkContext
+
 import copy
 
 class Iceberg :
@@ -7,10 +13,16 @@ class Iceberg :
         providing catalog meta information (you must get this from apache iceberg)
         """
         #
+        # Turning off logging (it's annoying & un-professional)
+        #
+        # _spconf = SparkContext()
+        # _spconf.setLogLevel("ERROR")
+        #
         # @TODO:
         #   Make arrangements for additional configuration elements 
         #
         self._session = SparkSession.builder.getOrCreate()
+        # self._session.sparkContext.setLogLevel("ERROR")
         self._catalog = self._session.catalog
         self._table = _args['table'] if 'table' in _args else None