parent
							
								
									cad54d7b45
								
							
						
					
					
						commit
						ee0165de01
					
				@ -0,0 +1,377 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					This file will perform basic tasks to finalize the GAN process by performing the following :
 | 
				
			||||||
 | 
					    - basic stats & analytics
 | 
				
			||||||
 | 
					    - rebuild io to another dataset
 | 
				
			||||||
 | 
					"""
 | 
				
			||||||
 | 
					import pandas as pd
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					from multiprocessing import Process, Lock
 | 
				
			||||||
 | 
					from google.oauth2 import service_account
 | 
				
			||||||
 | 
					from google.cloud import bigquery as bq
 | 
				
			||||||
 | 
					import transport
 | 
				
			||||||
 | 
					from data.params import SYS_ARGS 
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import pandas as pd
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					from google.oauth2 import service_account
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# path = '../curation-prod.json'
 | 
				
			||||||
 | 
					# credentials = service_account.Credentials.from_service_account_file(path)
 | 
				
			||||||
 | 
					# df = pd.read_gbq("SELECT * FROM io.icd10_partial_io",credentials=credentials,dialect='standard')
 | 
				
			||||||
 | 
					filename = 'config.json' if 'config' not in SYS_ARGS else SYS_ARGS['config']
 | 
				
			||||||
 | 
					f = open(filename)
 | 
				
			||||||
 | 
					config = json.loads(f.read())
 | 
				
			||||||
 | 
					args = config['pipeline']
 | 
				
			||||||
 | 
					f.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _formatSQL(**_args):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    This function will build the _map for a given segment
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    sql = """
 | 
				
			||||||
 | 
					    select DISTINCT x.person_id synthetic,y.person_id original 
 | 
				
			||||||
 | 
					    FROM :synthetic.:table x 
 | 
				
			||||||
 | 
					    INNER JOIN :original.:table y on x.person_id in (:ids)
 | 
				
			||||||
 | 
					    AND x.person_id <> y.person_id AND x.gender_source_value = y.gender_source_value 
 | 
				
			||||||
 | 
					    AND x.year_of_birth = y.year_of_birth 
 | 
				
			||||||
 | 
					    ORDER BY 1
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    table= _args['table']
 | 
				
			||||||
 | 
					    original,synthetic = _args['schema']['original'],_args['schema']['synthetic']
 | 
				
			||||||
 | 
					    _ids = np.array(_args['ids']).astype(str)
 | 
				
			||||||
 | 
					    return sql.replace(":ids",",".join(_ids)).replace(":synthetic",synthetic).replace(":original",original).replace(":table",table)
 | 
				
			||||||
 | 
					def _addCounts(**_args) :
 | 
				
			||||||
 | 
					    store   = _args['store']
 | 
				
			||||||
 | 
					    sql     = _args['sql']
 | 
				
			||||||
 | 
					    reader = transport.factory.instance(**store['source'])
 | 
				
			||||||
 | 
					    _df = reader.read(sql=sql)
 | 
				
			||||||
 | 
					    _ids = _df.synthetic.unique()
 | 
				
			||||||
 | 
					    _counts = [ np.sum(_df.synthetic == value) for value in _ids]
 | 
				
			||||||
 | 
					    original = [_df[_df.synthetic == value].iloc[np.random.choice(np.arange(_counts[_ids.tolist().index(value)]),1),:].original.values[0] for value in _ids]
 | 
				
			||||||
 | 
					    _df = pd.DataFrame({"synthetic":_ids,"original":original,"counts":_counts})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    # We can post this to the backend ...
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    table = '_map'  #-- Yes this is hard-coded
 | 
				
			||||||
 | 
					    writer = transport.factory.instance(**dict(store['target'],**{"parallel":True,"table":table}))
 | 
				
			||||||
 | 
					    # if writer.has(table=table) is False:
 | 
				
			||||||
 | 
					    #     writer.write(_df)
 | 
				
			||||||
 | 
					    # else:
 | 
				
			||||||
 | 
					    _schema = [{"name":name,"type":"INTEGER"} for name in _df.columns]
 | 
				
			||||||
 | 
					    writer.write(_df,schema=_schema)
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def Init(**_args) :
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    This function will build a map of the synthetic to real individuals. 
 | 
				
			||||||
 | 
					    The assumption is that the synthesized data is stored in the same data-store as the original the parameters provided are :
 | 
				
			||||||
 | 
					    :param  store       object from the configuration file with source,target entries
 | 
				
			||||||
 | 
					    :param  table       name of the original/synthetic tables (they should be the same)
 | 
				
			||||||
 | 
					    :param  feat.       featuress/attributes ... demographics to account for
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    store = _args['store']
 | 
				
			||||||
 | 
					    reader = transport.factory.instance(**store['source'])
 | 
				
			||||||
 | 
					    original,synthetic = _args['schema']['original'],_args['schema']['synthetic']
 | 
				
			||||||
 | 
					    table = _args['table']
 | 
				
			||||||
 | 
					    sql = _args['sql'].replace(':synthetic',synthetic).replace(':original',original).replace(':table',table)
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					    _map = reader.read(sql=sql)
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    k = _args['k'] if 'k' in _args else 2
 | 
				
			||||||
 | 
					    # _iodf = reader.read(table=table)
 | 
				
			||||||
 | 
					    # _ids = _iodf['person_id'].unique().tolist()
 | 
				
			||||||
 | 
					    # x_  = np.array_split(_ids,1000)
 | 
				
			||||||
 | 
					    jobs = []
 | 
				
			||||||
 | 
					    # for _items in x_ :
 | 
				
			||||||
 | 
					    #     _p = {"ids":_items,"schema":_args['schema'],'store':store,'table':table}
 | 
				
			||||||
 | 
					    #     sql = _formatSQL(**_p)
 | 
				
			||||||
 | 
					    #     _p['sql'] = sql
 | 
				
			||||||
 | 
					    #     _apply = lambda params: _addCounts(**params)
 | 
				
			||||||
 | 
					    #     thread = Process(target=_apply,args=(_p,))
 | 
				
			||||||
 | 
					    #     thread.start()
 | 
				
			||||||
 | 
					    #     jobs.append(thread)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # return jobs
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    # We have performed a m:m (many-to-many) relationship with original participants and synthetic participants
 | 
				
			||||||
 | 
					    # The goal is to obtain a singular map against which records will be migrated
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    print (['... computing counts (k)'])
 | 
				
			||||||
 | 
					    _ids = _map.synthetic.unique()
 | 
				
			||||||
 | 
					    _counts = [ np.sum(_map.synthetic == value) for value in _ids]
 | 
				
			||||||
 | 
					    original = [_map[_map.synthetic == value].iloc[np.random.choice(np.arange(_counts[_ids.tolist().index(value)]),1),:].original.values[0] for value in _ids]
 | 
				
			||||||
 | 
					    print (['Building k-classes/groups'])
 | 
				
			||||||
 | 
					    _mdf = pd.DataFrame({"synthetic":_ids,"original":original,"counts":_counts})
 | 
				
			||||||
 | 
					    i = _mdf.apply(lambda row: row.counts >= k,axis=1)
 | 
				
			||||||
 | 
					    _mdf = _mdf[i]
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    # Log what just happened here so we know about the equivalence classes, 
 | 
				
			||||||
 | 
					    # {"module":"binder","action":"map-generation","input":{"k":k,"rows":{"synthetic":_mdf.shape[0],"original":len(_counts)}}}
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return _mdf
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					    # now we are posting this to target storage ...
 | 
				
			||||||
 | 
					    #
 | 
				
			||||||
 | 
					def ApplyOn (**_args):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    This  function will rewrite SQL that applies the synthetic identifier to the entries of the pipeline
 | 
				
			||||||
 | 
					    We assume that the _map has two attributes (synthetic and original)
 | 
				
			||||||
 | 
					    :param  store
 | 
				
			||||||
 | 
					    :param  _config
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    store_args = _args['store']
 | 
				
			||||||
 | 
					    _config = _args['config']
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    table = _config['from']
 | 
				
			||||||
 | 
					    reader  = transport.factory.instance(**dict(store_args['source'],**{"table":table}))
 | 
				
			||||||
 | 
					    attr = reader.read(limit=1).columns.tolist()
 | 
				
			||||||
 | 
					    original_key = _args['original_key'] #-- assuming referential integrity
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    # synthetic_key= columns['synthetic']
 | 
				
			||||||
 | 
					    # mapped_original=columns['orginal']
 | 
				
			||||||
 | 
					    fields = list(set(attr) - set([original_key]))
 | 
				
			||||||
 | 
					    sql = "select _map.synthetic as :original_key,:fields from :original_schema.:table inner join :synthetic_schema._map on _map.original = :table.:original_key"
 | 
				
			||||||
 | 
					    sql = sql.replace(":table",table).replace(":fields",",".join(fields))
 | 
				
			||||||
 | 
					    sql = sql.replace(":original_key",original_key)
 | 
				
			||||||
 | 
					    _schema = _args['schema']
 | 
				
			||||||
 | 
					    sql = sql.replace(":original_schema",_schema['original']).replace(":synthetic_schema",_schema['synthetic'])
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return reader.read (sql=sql)
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					if __name__ == '__main__' :
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# class Analytics :
 | 
				
			||||||
 | 
					#     """
 | 
				
			||||||
 | 
					#     This class will compile basic analytics about a given dataset i.e compare original/synthetic
 | 
				
			||||||
 | 
					#     """
 | 
				
			||||||
 | 
					#     @staticmethod
 | 
				
			||||||
 | 
					#     def distribution(**args):
 | 
				
			||||||
 | 
					#         context = args['context']
 | 
				
			||||||
 | 
					#         df = args['data']
 | 
				
			||||||
 | 
					#         #
 | 
				
			||||||
 | 
					#         #-- This data frame counts unique values for each feature (space)
 | 
				
			||||||
 | 
					#         df_counts = pd.DataFrame(df.apply(lambda col: col.unique().size),columns=['counts']).T  # unique counts
 | 
				
			||||||
 | 
					#         #
 | 
				
			||||||
 | 
					#         #-- Get the distributions for common values
 | 
				
			||||||
 | 
					#         #
 | 
				
			||||||
 | 
					#         names   = [name for name in df_counts.columns.tolist() if name.endswith('_io') == False]
 | 
				
			||||||
 | 
					#         ddf     = df.apply(lambda col: pd.DataFrame(col.values,columns=[col.name]).groupby([col.name]).size() ).fillna(0)
 | 
				
			||||||
 | 
					#         ddf[context] = ddf.index
 | 
				
			||||||
 | 
					          
 | 
				
			||||||
 | 
					#         pass
 | 
				
			||||||
 | 
					#     def distance(**args):
 | 
				
			||||||
 | 
					#         """
 | 
				
			||||||
 | 
					#         This function will measure the distance between 
 | 
				
			||||||
 | 
					#         """
 | 
				
			||||||
 | 
					#         pass
 | 
				
			||||||
 | 
					# class Utils :
 | 
				
			||||||
 | 
					#     @staticmethod
 | 
				
			||||||
 | 
					#     def log(**args):
 | 
				
			||||||
 | 
					#         logger = transport.factory.instance(type="mongo.MongoWriter",args={"dbname":"aou","doc":"logs"})        
 | 
				
			||||||
 | 
					#         logger.write(args)
 | 
				
			||||||
 | 
					#         logger.close()
 | 
				
			||||||
 | 
					#     class get :
 | 
				
			||||||
 | 
					#         @staticmethod
 | 
				
			||||||
 | 
					#         def pipeline(table,path) :
 | 
				
			||||||
 | 
					#             # contexts    = args['contexts'].split(',') if type(args['contexts']) == str else args['contexts']
 | 
				
			||||||
 | 
					#             config = json.loads((open(path)).read())
 | 
				
			||||||
 | 
					#             pipeline    = config['pipeline']
 | 
				
			||||||
 | 
					#             # return [ item for item in pipeline if item['context'] in contexts]
 | 
				
			||||||
 | 
					#             pipeline =  [item for item in pipeline if 'from' in item and item['from'].strip() == table]
 | 
				
			||||||
 | 
					#             Utils.log(module=table,action='init',input={"pipeline":pipeline})
 | 
				
			||||||
 | 
					#             return pipeline
 | 
				
			||||||
 | 
					#         @staticmethod
 | 
				
			||||||
 | 
					#         def sql(**args) :
 | 
				
			||||||
 | 
					#             """
 | 
				
			||||||
 | 
					#             This function is intended to build SQL query for the remainder of the table that was not synthesized
 | 
				
			||||||
 | 
					#             :config configuration entries
 | 
				
			||||||
 | 
					#             :from   source of the table name
 | 
				
			||||||
 | 
					#             :dataset    name of the source dataset
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					#             """
 | 
				
			||||||
 | 
					#             SQL = ["SELECT * FROM :from "]
 | 
				
			||||||
 | 
					#             SQL_FILTER = []
 | 
				
			||||||
 | 
					#             NO_FILTERS_FOUND = True
 | 
				
			||||||
 | 
					#             # pipeline = Utils.get.config(**args)
 | 
				
			||||||
 | 
					#             pipeline = args['pipeline']
 | 
				
			||||||
 | 
					#             REVERSE_QUALIFIER = {'IN':'NOT IN','NOT IN':'IN','=':'<>','<>':'='}
 | 
				
			||||||
 | 
					#             for item in pipeline :
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#                 if 'filter' in item :
 | 
				
			||||||
 | 
					#                     if NO_FILTERS_FOUND :
 | 
				
			||||||
 | 
					#                         NO_FILTERS_FOUND = False
 | 
				
			||||||
 | 
					#                         SQL  += ['WHERE']
 | 
				
			||||||
 | 
					#                     #
 | 
				
			||||||
 | 
					#                     # Let us load the filter in the SQL Query
 | 
				
			||||||
 | 
					#                     FILTER = item['filter']
 | 
				
			||||||
 | 
					#                     QUALIFIER = REVERSE_QUALIFIER[FILTER['qualifier'].upper()]
 | 
				
			||||||
 | 
					#                     SQL_FILTER += [" ".join([FILTER['field'], QUALIFIER,'(',FILTER['value'],')']).replace(":dataset",args['dataset'])]
 | 
				
			||||||
 | 
					#             src = ".".join([args['dataset'],args['from']])
 | 
				
			||||||
 | 
					#             SQL += [" AND ".join(SQL_FILTER)]
 | 
				
			||||||
 | 
					#             #
 | 
				
			||||||
 | 
					#             # let's pull the field schemas out of the table definition
 | 
				
			||||||
 | 
					#             #
 | 
				
			||||||
 | 
					#             Utils.log(module=args['from'],action='sql',input={"sql":" ".join(SQL) })
 | 
				
			||||||
 | 
					#             return " ".join(SQL).replace(":from",src)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					# def mk(**args) :
 | 
				
			||||||
 | 
					#     dataset  = args['dataset']
 | 
				
			||||||
 | 
					#     client  = args['client'] if 'client' in args else bq.Client.from_service_account_file(args['private_key'])
 | 
				
			||||||
 | 
					#     #
 | 
				
			||||||
 | 
					#     # let us see if we have a dataset handy here 
 | 
				
			||||||
 | 
					#     #
 | 
				
			||||||
 | 
					#     datasets = list(client.list_datasets())
 | 
				
			||||||
 | 
					#     found = [item for item in datasets if item.dataset_id == dataset]
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					#     if not found :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#         return client.create_dataset(dataset)
 | 
				
			||||||
 | 
					#     return found[0] 
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					# def move (args):
 | 
				
			||||||
 | 
					#     """
 | 
				
			||||||
 | 
					#     This function will move a table from the synthetic dataset into a designated location
 | 
				
			||||||
 | 
					#     This is the simplest case for finalizing a synthetic data set
 | 
				
			||||||
 | 
					#     :private_key        
 | 
				
			||||||
 | 
					#     """
 | 
				
			||||||
 | 
					#     pipeline   = Utils.get.pipeline(args['from'],args['config'])
 | 
				
			||||||
 | 
					#     _args = json.loads((open(args['config'])).read())
 | 
				
			||||||
 | 
					#     _args['pipeline'] = pipeline
 | 
				
			||||||
 | 
					#     # del _args['pipeline']
 | 
				
			||||||
 | 
					#     args = dict(args,**_args)
 | 
				
			||||||
 | 
					#     # del args['pipeline']
 | 
				
			||||||
 | 
					#     # private_key = args['private_key']
 | 
				
			||||||
 | 
					#     client      = bq.Client.from_service_account_json(args['private_key'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     dataset     = args['dataset']
 | 
				
			||||||
 | 
					#     if pipeline :
 | 
				
			||||||
 | 
					#         SQL         = [ ''.join(["SELECT * FROM io.",item['context'],'_full_io']) for item in pipeline]
 | 
				
			||||||
 | 
					#         SQL         += [Utils.get.sql(**args)]
 | 
				
			||||||
 | 
					#         SQL         =  ('\n UNION ALL \n'.join(SQL).replace(':dataset','io'))
 | 
				
			||||||
 | 
					#     else:
 | 
				
			||||||
 | 
					#         #
 | 
				
			||||||
 | 
					#         # moving a table to a designated location
 | 
				
			||||||
 | 
					#         tablename = args['from']
 | 
				
			||||||
 | 
					#         if 'sql' not in args :
 | 
				
			||||||
 | 
					#             SQL = "SELECT * FROM :dataset.:table"
 | 
				
			||||||
 | 
					#         else:
 | 
				
			||||||
 | 
					#             SQL = args['sql']
 | 
				
			||||||
 | 
					#         SQL = SQL.replace(":dataset",dataset).replace(":table",tablename)
 | 
				
			||||||
 | 
					#     Utils.log(module=args['from'],action='sql',input={'sql':SQL})
 | 
				
			||||||
 | 
					#     #
 | 
				
			||||||
 | 
					#     # At this point we have gathered all the tables in the io folder and we should now see if we need to merge with the remainder from the original table
 | 
				
			||||||
 | 
					#     #
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     odataset    = mk(dataset=dataset+'_io',client=client)
 | 
				
			||||||
 | 
					#     # SQL =       "SELECT * FROM io.:context_full_io".replace(':context',context)
 | 
				
			||||||
 | 
					#     config = bq.QueryJobConfig()
 | 
				
			||||||
 | 
					#     config.destination = client.dataset(odataset.dataset_id).table(args['from'])
 | 
				
			||||||
 | 
					#     config.use_query_cache = True
 | 
				
			||||||
 | 
					#     config.allow_large_results = True
 | 
				
			||||||
 | 
					#     config.priority = 'INTERACTIVE'
 | 
				
			||||||
 | 
					#     #
 | 
				
			||||||
 | 
					#     #
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#     schema = client.get_table(client.dataset(args['dataset']).table(args['from'])).schema
 | 
				
			||||||
 | 
					#     fields = [" ".join(["CAST (",item.name,"AS",item.field_type.replace("INTEGER","INT64").replace("FLOAT","FLOAT64"),") ",item.name]) for item in schema]
 | 
				
			||||||
 | 
					#     SQL = SQL.replace("*"," , ".join(fields))
 | 
				
			||||||
 | 
					#     # print (SQL)
 | 
				
			||||||
 | 
					#     out = client.query(SQL,location='US',job_config=config)
 | 
				
			||||||
 | 
					#     Utils.log(module=args['from'],action='move',input={'job':out.job_id})
 | 
				
			||||||
 | 
					#     return (out.job_id)
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# import pandas as pd
 | 
				
			||||||
 | 
					# import numpy as np
 | 
				
			||||||
 | 
					# from google.oauth2 import service_account
 | 
				
			||||||
 | 
					# import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# # path = '../curation-prod.json'
 | 
				
			||||||
 | 
					# # credentials = service_account.Credentials.from_service_account_file(path)
 | 
				
			||||||
 | 
					# # df = pd.read_gbq("SELECT * FROM io.icd10_partial_io",credentials=credentials,dialect='standard')
 | 
				
			||||||
 | 
					# filename = 'config.json' if 'config' not in SYS_ARGS else SYS_ARGS['config']
 | 
				
			||||||
 | 
					# f = open(filename)
 | 
				
			||||||
 | 
					# config = json.loads(f.read())
 | 
				
			||||||
 | 
					# args = config['pipeline']
 | 
				
			||||||
 | 
					# f.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# if __name__ == '__main__' :
 | 
				
			||||||
 | 
					#     """
 | 
				
			||||||
 | 
					#     Usage :
 | 
				
			||||||
 | 
					#         finalize --<move|stats> --contexts <c1,c2,...c3> --from <table>
 | 
				
			||||||
 | 
					#     """
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					#     if 'move' in SYS_ARGS :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#         if 'init' in SYS_ARGS :
 | 
				
			||||||
 | 
					#             dep = config['dep'] if 'dep' in config else {}
 | 
				
			||||||
 | 
					#             info = []
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					#             if 'queries' in dep :
 | 
				
			||||||
 | 
					#                 info += dep['queries']
 | 
				
			||||||
 | 
					#                 print ('________')
 | 
				
			||||||
 | 
					#             if 'tables' in dep :
 | 
				
			||||||
 | 
					#                 info += dep['tables']
 | 
				
			||||||
 | 
					#             args = {}
 | 
				
			||||||
 | 
					#             jobs = []
 | 
				
			||||||
 | 
					#             for item in info :
 | 
				
			||||||
 | 
					#                 args = {}
 | 
				
			||||||
 | 
					#                 if type(item) == str :
 | 
				
			||||||
 | 
					#                     args['from'] = item
 | 
				
			||||||
 | 
					#                     name = item
 | 
				
			||||||
 | 
					#                 else:
 | 
				
			||||||
 | 
					#                     args = item
 | 
				
			||||||
 | 
					#                     name = item['from']
 | 
				
			||||||
 | 
					#                 args['config'] = SYS_ARGS['config']
 | 
				
			||||||
 | 
					#                 # args['pipeline'] = []
 | 
				
			||||||
 | 
					#                 job = Process(target=move,args=(args,))
 | 
				
			||||||
 | 
					#                 job.name = name
 | 
				
			||||||
 | 
					#                 jobs.append(job)
 | 
				
			||||||
 | 
					#                 job.start()
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#             # while len(jobs) > 0 :
 | 
				
			||||||
 | 
					#             #     jobs = [job for job in jobs if job.is_alive()]
 | 
				
			||||||
 | 
					#             #     time.sleep(1)
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#         else:
 | 
				
			||||||
 | 
					#             move(SYS_ARGS)
 | 
				
			||||||
 | 
					#         # # table = SYS_ARGS['from']
 | 
				
			||||||
 | 
					#         # # args = dict(config,**{"private_key":"../curation-prod.json"})
 | 
				
			||||||
 | 
					#         # args = dict(args,**SYS_ARGS)        
 | 
				
			||||||
 | 
					#         # contexts = [item['context'] for item in config['pipeline'] if item['from'] == SYS_ARGS['from']]
 | 
				
			||||||
 | 
					#         # log = []
 | 
				
			||||||
 | 
					#         # if contexts :
 | 
				
			||||||
 | 
					#         #     args['contexts'] = contexts
 | 
				
			||||||
 | 
					#         #     log = move(**args)
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					#         # else:
 | 
				
			||||||
 | 
					#         #     tables = args['from'].split(',')
 | 
				
			||||||
 | 
					#         #     for name in tables :
 | 
				
			||||||
 | 
					#         #         name = name.strip()
 | 
				
			||||||
 | 
					#         #         args['from'] = name
 | 
				
			||||||
 | 
					#         #         log += [move(**args)]
 | 
				
			||||||
 | 
					#         # print ("\n".join(log))
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					#     else:
 | 
				
			||||||
 | 
					#         print ("NOT YET READY !")
 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue