optimizations mongodb

pull/1/head
Steve Nyemba 3 years ago
parent 38e1bce6c2
commit e5fadc64a0

@ -63,8 +63,8 @@ class Post(Process):
else: else:
self.PROVIDER = args['target']['provider'] self.PROVIDER = args['target']['provider']
args['target']['context'] = 'write' args['target']['context'] = 'write'
self.store = args['target']
self.writer = transport.instance(**args['target']) # self.writer = transport.instance(**args['target'])
# #
# If the table doesn't exists maybe create it ? # If the table doesn't exists maybe create it ?
# #
@ -86,9 +86,9 @@ class Post(Process):
else: else:
value = '' value = ''
_info[name] = _info[name].fillna(value) _info[name] = _info[name].fillna(value)
writer = transport.factory.instance(**self.store)
self.writer.write(_info) writer.write(_info)
self.writer.close() writer.close()
class ETL (Process): class ETL (Process):
@ -139,11 +139,11 @@ class ETL (Process):
# #
# @TODO: locks # @TODO: locks
for i in np.arange(self.JOB_COUNT) : for i in np.arange(self.JOB_COUNT) :
print ()
print (i)
_id = 'segment # '.join([str(i),' ',self.name]) _id = 'segment # '.join([str(i),' ',self.name])
indexes = rows[i] indexes = rows[i]
segment = idf.loc[indexes,:].copy() #.to_dict(orient='records') segment = idf.loc[indexes,:].copy() #.to_dict(orient='records')
if segment.shape[0] == 0 :
continue
proc = Post(target = self._oargs,rows = segment,name=_id) proc = Post(target = self._oargs,rows = segment,name=_id)
self.jobs.append(proc) self.jobs.append(proc)
proc.start() proc.start()

@ -20,7 +20,9 @@ else:
from common import Reader, Writer from common import Reader, Writer
import json import json
import re import re
from multiprocessing import Lock, RLock
class Mongo : class Mongo :
lock = RLock()
""" """
Basic mongodb functions are captured here Basic mongodb functions are captured here
""" """
@ -44,6 +46,7 @@ class Mongo :
self.uid = args['doc'] #-- document identifier self.uid = args['doc'] #-- document identifier
self.dbname = args['dbname'] if 'dbname' in args else args['db'] self.dbname = args['dbname'] if 'dbname' in args else args['db']
self.db = self.client[self.dbname] self.db = self.client[self.dbname]
self._lock = False if 'lock' not in args else args['lock']
def isready(self): def isready(self):
p = self.dbname in self.client.list_database_names() p = self.dbname in self.client.list_database_names()
@ -144,10 +147,17 @@ class MongoWriter(Mongo,Writer):
# if type(info) == list : # if type(info) == list :
# self.db[self.uid].insert_many(info) # self.db[self.uid].insert_many(info)
# else: # else:
if type(info) == list or type(info) == pd.DataFrame : try:
self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
else: if self._lock :
self.db[self.uid].insert_one(info) Mongo.lock.acquire()
if type(info) == list or type(info) == pd.DataFrame :
self.db[self.uid].insert_many(info if type(info) == list else info.to_dict(orient='records'))
else:
self.db[self.uid].insert_one(info)
finally:
if self._lock :
Mongo.lock.release()
def set(self,document): def set(self,document):
""" """
if no identifier is provided the function will delete the entire collection and set the new document. if no identifier is provided the function will delete the entire collection and set the new document.

Loading…
Cancel
Save