bug fix, enhancement with pandas

pull/1/head
Steve L. Nyemba 3 years ago
parent 185158f006
commit 79cdc0c0d0

@ -41,6 +41,7 @@ class Post(Process):
self.rows = args['rows'] self.rows = args['rows']
def run(self): def run(self):
_info = {"values":self.rows} if 'couch' in self.PROVIDER else self.rows _info = {"values":self.rows} if 'couch' in self.PROVIDER else self.rows
self.writer.write(_info) self.writer.write(_info)
self.writer.close() self.writer.close()
@ -70,7 +71,7 @@ class ETL (Process):
rows = np.array_split(np.arange(idf.shape[0]),self.JOB_COUNT) rows = np.array_split(np.arange(idf.shape[0]),self.JOB_COUNT)
jobs = [] jobs = []
for i in rows : for i in rows :
segment = idf.loc[i,:].to_dict(orient='records') segment = idf.loc[i,:] #.to_dict(orient='records')
proc = Post(target = self._oargs,rows = segment) proc = Post(target = self._oargs,rows = segment)
jobs.append(proc) jobs.append(proc)
proc.start() proc.start()
@ -89,6 +90,6 @@ if __name__ == '__main__' :
if 'source' in SYS_ARGS : if 'source' in SYS_ARGS :
_config['source'] = {"type":"disk.DiskReader","args":{"path":SYS_ARGS['source'],"delimiter":","}} _config['source'] = {"type":"disk.DiskReader","args":{"path":SYS_ARGS['source'],"delimiter":","}}
_config['jobs'] = 10 if 'jobs' not in SYS_ARGS else SYS_ARGS['jobs'] _config['jobs'] = 10 if 'jobs' not in SYS_ARGS else int(SYS_ARGS['jobs'])
etl = ETL (**_config) etl = ETL (**_config)
etl.start() etl.start()

@ -8,7 +8,7 @@ def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read() return open(os.path.join(os.path.dirname(__file__), fname)).read()
args = { args = {
"name":"data-transport", "name":"data-transport",
"version":"1.3.9.0", "version":"1.3.9.2",
"author":"The Phi Technology LLC","author_email":"info@the-phi.com", "author":"The Phi Technology LLC","author_email":"info@the-phi.com",
"license":"MIT", "license":"MIT",
"packages":["transport"]} "packages":["transport"]}

@ -142,8 +142,8 @@ class MongoWriter(Mongo,Writer):
# if type(info) == list : # if type(info) == list :
# self.db[self.uid].insert_many(info) # self.db[self.uid].insert_many(info)
# else: # else:
if (type(info) == list) : if type(info) == list or type(info) == pd.DataFrame :
self.db[self.uid].insert_many(info) self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
else: else:
self.db[self.uid].insert_one(info) self.db[self.uid].insert_one(info)
def set(self,document): def set(self,document):

@ -157,14 +157,23 @@ class SQLWriter(SQLRW,Writer):
# inspect = False if 'inspect' not in _args else _args['inspect'] # inspect = False if 'inspect' not in _args else _args['inspect']
# cast = False if 'cast' not in _args else _args['cast'] # cast = False if 'cast' not in _args else _args['cast']
if not self.fields : if not self.fields :
_fields = info.keys() if type(info) == dict else info[0].keys() if type(info) == list :
_fields = info[0].keys()
elif type(info) == dict :
_fields = info.keys()
elif type(info) == pd.DataFrame :
_fields = info.columns
# _fields = info.keys() if type(info) == dict else info[0].keys()
_fields = list (_fields) _fields = list (_fields)
self.init(_fields) self.init(_fields)
# #
# @TODO: Use pandas/odbc ? Not sure b/c it requires sqlalchemy # @TODO: Use pandas/odbc ? Not sure b/c it requires sqlalchemy
# #
if type(info) != list : if type(info) != list :
info = [info] #
# We are assuming 2 cases i.e dict or pd.DataFrame
info = [info] if type(info) == dict else info.values.tolist()
cursor = self.conn.cursor() cursor = self.conn.cursor()
try: try:
_sql = "INSERT INTO :table (:fields) VALUES (:values)".replace(":table",self.table) #.replace(":table",self.table).replace(":fields",_fields) _sql = "INSERT INTO :table (:fields) VALUES (:values)".replace(":table",self.table) #.replace(":table",self.table).replace(":fields",_fields)

Loading…
Cancel
Save