@ -146,6 +146,8 @@ class Components :
_args [ ' data ' ] = _args [ ' data ' ] [ list ( set ( _args [ ' data ' ] . columns ) - set ( x_cols ) ) ]
_args [ ' data ' ] = _args [ ' data ' ] [ list ( set ( _args [ ' data ' ] . columns ) - set ( x_cols ) ) ]
if ' gpu ' in args :
if ' gpu ' in args :
_args [ ' gpu ' ] = self . set_gpu ( gpu = args [ ' gpu ' ] )
_args [ ' gpu ' ] = self . set_gpu ( gpu = args [ ' gpu ' ] )
if ' partition ' in args :
_args [ ' partition ' ] = args [ ' partition ' ]
if df . shape [ 0 ] and df . shape [ 0 ] :
if df . shape [ 0 ] and df . shape [ 0 ] :
#
#
# We have a full blown matrix to be processed
# We have a full blown matrix to be processed
@ -154,7 +156,7 @@ class Components :
print ( " ... skipping training !! " )
print ( " ... skipping training !! " )
if ' autopilot ' in ( list ( args . keys ( ) ) ) :
if ' autopilot ' in ( list ( args . keys ( ) ) ) :
args [ ' data ' ] = df
args [ ' data ' ] = df
print ( [ ' autopilot mode enabled .... ' , args [ ' context ' ] ] )
print ( [ ' autopilot mode enabled .... ' , args [ ' context ' ] ] )
self . generate ( args )
self . generate ( args )
@ -171,6 +173,7 @@ class Components :
r = np . random . dirichlet ( values + .001 ) #-- dirichlet doesn't work on values with zeros
r = np . random . dirichlet ( values + .001 ) #-- dirichlet doesn't work on values with zeros
_sd = values [ values > 0 ] . std ( )
_sd = values [ values > 0 ] . std ( )
_me = values [ values > 0 ] . mean ( )
_me = values [ values > 0 ] . mean ( )
_mi = values . min ( )
x = [ ]
x = [ ]
_type = values . dtype
_type = values . dtype
for index in np . arange ( values . size ) :
for index in np . arange ( values . size ) :
@ -182,7 +185,7 @@ class Components :
value = values [ index ] - ( values [ index ] * r [ index ] )
value = values [ index ] - ( values [ index ] * r [ index ] )
#
#
# randomly shifting the measurements
# randomly shifting the measurements
if np . random . choice ( [ 0 , 1 ] , 1 ) [ 0 ] and _me > _sd :
if np . random . choice ( [ 0 , 1 ] , 1 ) [ 0 ] and _me > _sd :
if np . random . choice ( [ 0 , 1 ] , 1 ) [ 0 ] :
if np . random . choice ( [ 0 , 1 ] , 1 ) [ 0 ] :
value = value * np . divide ( _me , _sd )
value = value * np . divide ( _me , _sd )
else :
else :
@ -273,6 +276,9 @@ class Components :
args [ ' candidates ' ] = 1 if ' candidates ' not in args else int ( args [ ' candidates ' ] )
args [ ' candidates ' ] = 1 if ' candidates ' not in args else int ( args [ ' candidates ' ] )
if ' gpu ' in args :
if ' gpu ' in args :
args [ ' gpu ' ] = self . set_gpu ( gpu = args [ ' gpu ' ] )
args [ ' gpu ' ] = self . set_gpu ( gpu = args [ ' gpu ' ] )
# if 'partition' in args :
# args['logs'] = os.sep.join([args['logs'],str(args['partition'])])
_info = { " module " : " gan-prep " , " action " : " prune " , " shape " : { " rows " : args [ ' data ' ] . shape [ 0 ] , " columns " : args [ ' data ' ] . shape [ 1 ] } }
_info = { " module " : " gan-prep " , " action " : " prune " , " shape " : { " rows " : args [ ' data ' ] . shape [ 0 ] , " columns " : args [ ' data ' ] . shape [ 1 ] } }
logger . write ( _info )
logger . write ( _info )
if args [ ' data ' ] . shape [ 0 ] > 0 and args [ ' data ' ] . shape [ 1 ] > 0 :
if args [ ' data ' ] . shape [ 0 ] > 0 and args [ ' data ' ] . shape [ 1 ] > 0 :
@ -459,12 +465,18 @@ if __name__ == '__main__' :
# COLUMNS = DATA.columns
# COLUMNS = DATA.columns
# DATA = np.array_split(DATA,PART_SIZE)
# DATA = np.array_split(DATA,PART_SIZE)
# args['schema'] = schema
# args['schema'] = schema
GPU_CHIPS = SYS_ARGS [ ' gpu ' ] if ' gpu ' in SYS_ARGS else None
if GPU_CHIPS and type ( GPU_CHIPS ) != list :
GPU_CHIPS = [ int ( _id . strip ( ) ) for _id in GPU_CHIPS . split ( ' , ' ) ] if type ( GPU_CHIPS ) == str else [ GPU_CHIPS ]
if ' gpu ' in SYS_ARGS :
args [ ' gpu ' ] = GPU_CHIPS
jobs = [ ]
if ' generate ' in SYS_ARGS :
if ' generate ' in SYS_ARGS :
#
#
# Let us see if we have partitions given the log folder
# Let us see if we have partitions given the log folder
content = os . listdir ( os . sep . join ( [ args [ ' logs ' ] , ' train ' , args [ ' context ' ] ] ) )
content = os . listdir ( os . sep . join ( [ args [ ' logs ' ] , ' train ' , args [ ' context ' ] ] ) )
generator = Components ( )
# if ''.join(content).isnumeric() :
# if ''.join(content).isnumeric() :
# #
# #
@ -508,13 +520,60 @@ if __name__ == '__main__' :
# else:
# else:
# generator.generate(args)
# generator.generate(args)
# Components.generate(args)
# Components.generate(args)
generator . generate ( args )
if ' --all-chips ' in SYS_ARGS and GPU_CHIPS :
index = 0
jobs = [ ]
for _id in GPU_CHIPS :
_args = copy . deepcopy ( args )
_args [ ' gpu ' ] = [ int ( _gpu ) ]
_args [ ' partition ' ] = index
index + = 1
make = lambda _params : ( Components ( ) ) . generate ( _params )
job = Process ( target = make , args = ( dict ( _args ) , ) )
job . name = ' Trainer # ' + str ( index )
job . start ( )
jobs . append ( job )
pass
else :
generator = Components ( )
generator . generate ( args )
else :
else :
# DATA = np.array_split(DATA,PART_SIZE)
# DATA = np.array_split(DATA,PART_SIZE)
agent = Components ( )
#
agent . train ( * * args )
# Let us create n-jobs across n-gpus, The assumption here is the data that is produced will be a partition
# @TODO: Find better name for partition
#
if GPU_CHIPS and ' --all-chips ' in SYS_ARGS :
index = 0
for _gpu in GPU_CHIPS :
_args = copy . deepcopy ( args )
_args [ ' gpu ' ] = [ int ( _gpu ) ]
_args [ ' partition ' ] = index
index + = 1
make = lambda _params : ( Components ( ) ) . train ( * * _params )
job = Process ( target = make , args = ( dict ( _args ) , ) )
job . name = ' Trainer # ' + str ( index )
job . start ( )
jobs . append ( job )
else :
#
# The choice of the chip will be made internally
agent = Components ( )
agent . train ( * * args )
#
# If we have any obs we should wait till they finish
#
while len ( jobs ) > 0 :
jobs = [ job for job in jobs if job . is_alive ( ) ]
time . sleep ( 2 )
# jobs = []
# jobs = []
# for index in range(0,PART_SIZE) :
# for index in range(0,PART_SIZE) :
# if 'focus' in args and int(args['focus']) != index :
# if 'focus' in args and int(args['focus']) != index :