data-maker/data/gan.py

"""
import pickle
                        self.NUM_LABELS = args['label'].shape[1]
                self.init_logs(**args)

        def init_logs(self,**args):
                self.log_dir = args['logs'] if 'logs' in args else 'logs'
                self.mkdir(self.log_dir)
                #
                # 
                for key in ['train','output'] :
                        self.mkdir(os.sep.join([self.log_dir,key]))
                        self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT]))
                        
                self.train_dir  = os.sep.join([self.log_dir,'train',self.CONTEXT])                
                self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
                if self.logger :
                        #
                        # We will clear the logs from the data-store 
                        #
                        column = self.ATTRIBUTES['synthetic']
                        db = self.logger.db
                        if db[column].count() > 0 :
                                db.backup.insert({'name':column,'logs':list(db[column].find()) })
                                db[column].drop()
                
        def load_meta(self,column):
                """
                This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.
                Because prediction and training can happen independently
                """
                # suffix = "-".join(column) if isinstance(column,list)else column
                suffix = self.get.suffix()
                _name = os.sep.join([self.out_dir,'meta-'+suffix+'.json'])
                if os.path.exists(_name) :
                        attr = json.loads((open(_name)).read())
                        for key in attr :
                                value = attr[key]
                                setattr(self,key,value)
                self.train_dir  = os.sep.join([self.log_dir,'train',self.CONTEXT])                
                self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])
                                
                        
        def log_meta(self,**args) :
                
                _object = {
                        # '_id':'meta',
                        'CONTEXT':self.CONTEXT,
                        'ATTRIBUTES':self.ATTRIBUTES,
                        'BATCHSIZE_PER_GPU':self.BATCHSIZE_PER_GPU,
                        'Z_DIM':self.Z_DIM,
                        "X_SPACE_SIZE":self.X_SPACE_SIZE,
                        "D_STRUCTURE":self.D_STRUCTURE,
                        "G_STRUCTURE":self.G_STRUCTURE,
                        "NUM_GPUS":self.NUM_GPUS,
                        "NUM_LABELS":self.NUM_LABELS,
                        "MAX_EPOCHS":self.MAX_EPOCHS,
                        "ROW_COUNT":self.ROW_COUNT
                }
                if args and 'key' in args and 'value' in args :
                        key = args['key']
                        value= args['value']
                        object[key] = value
                # suffix = "-".join(self.column) if isinstance(self.column,list) else self.column
                suffix = self.get.suffix()
                _name = os.sep.join([self.out_dir,'meta-'+suffix])
                
                f = open(_name+'.json','w')
                f.write(json.dumps(_object))
                return _object
        def mkdir (self,path):
                if not os.path.exists(path) :

                        cpu_var =  tf.compat.v1.get_variable(name,shape,initializer= initializer)
                return cpu_var
        def average_gradients(self,tower_grads):
                average_grads = []
                for grad_and_vars in zip(*tower_grads):
                        grads = []
                        for g, _ in grad_and_vars:
                                expanded_g = tf.expand_dims(g, 0)
                                grads.append(expanded_g)

                        grad = tf.concat(axis=0, values=grads)
                        grad = tf.reduce_mean(grad, 0)

                        v = grad_and_vars[0][1]
                        grad_and_var = (grad, v)
                        average_grads.append(grad_and_var)
                return average_grads            
                This function will build the network that will generate the synthetic candidates
                :inputs
                :label
                """
                x = args['inputs']
                label = args['label']
                with tf.compat.v1.variable_scope('D', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):
                        for i, dim in enumerate(self.D_STRUCTURE[1:]):
                                kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[i], dim])
                                bias = self.get.variables(name='b_' + str(i), shape=[dim])
                                # print (["\t",bias,kernel])
                                x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias))
                                x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS)
                        i = len(self.D_STRUCTURE)
                        kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1])
                        bias = self.get.variables(name='b_' + str(i), shape=[1])
                        y = tf.add(tf.matmul(x, kernel), bias)
                return y
                scope   = args['scope']
                stage   = args['stage']
                real    = args['real']
                label   = args['label']
                if self._LABEL is not None :
                        dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
                else :
                        dataset = tf.data.Dataset.from_tensor_slices(features_placeholder)
                # labels_placeholder = None
                                                grads = opt.compute_gradients(loss, vars_)
                                                tower_grads.append(grads)
                                                per_gpu_w.append(w)

                grads = self.average_gradients(tower_grads)
                apply_gradient_op = opt.apply_gradients(grads)

                mean_w = tf.reduce_mean(per_gpu_w)
                train_op = apply_gradient_op
                return train_op, mean_w, iterator, features_placeholder, labels_placeholder
        def apply(self,**args):
                # max_epochs = args['max_epochs'] if 'max_epochs' in args else 10
                REAL = self._REAL
                LABEL= self._LABEL       
                if (self.logger):
                        pass
                
                with tf.device('/cpu:0'):
                        opt_d = tf.compat.v1.train.AdamOptimizer(1e-4)
                        opt_g = tf.compat.v1.train.AdamOptimizer(1e-4)
                        
                        train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = self.network(stage='D', opt=opt_d)
                        train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = self.network(stage='G', opt=opt_g)
                        # saver = tf.train.Saver()
                        saver   = tf.compat.v1.train.Saver()
                        # init    = tf.global_variables_initializer()
                        init    = tf.compat.v1.global_variables_initializer()
                        logs = []
                        #with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
                        with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
                NTH_VALID_CANDIDATE = count = np.random.choice(np.arange(2,60),2)[0]
                with tf.compat.v1.Session() as sess:
                        
                        # sess.run(init)
                        saver.restore(sess, model_dir)
                        # _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv'])
                        # df.to_csv(_name,index=False)

                        
                        # output.extend(np.round(f))

                        # for m in range(2):
                        #         for n in range(2, self.NUM_LABELS):
                        #                 idx1 = (demo[:, m] == 1)
                        #                 idx2 = (demo[:, n] == 1)
                        #                 idx = [idx1[j] and idx2[j] for j in range(len(idx1))]
                        #                 num = np.sum(idx)
                        #                 print ("___________________list__")
                        #                 print (idx1)
                        #                 print (idx2)
                        #                 print (idx)
                        #                 print (num)
                        #                 print ("_____________________")
                        #                 nbatch = int(np.ceil(num / self.BATCHSIZE_PER_GPU))
                        #                 label_input = np.zeros((nbatch*self.BATCHSIZE_PER_GPU, self.NUM_LABELS))
                        #                 label_input[:, n] = 1
                        #                 label_input[:, m] = 1
                        #                 output = []
                        #                 for i in range(nbatch):
                        #                         f = sess.run(fake,feed_dict={y: label_input[i* self.BATCHSIZE_PER_GPU:(i+1)* self.BATCHSIZE_PER_GPU]})
                        #                         output.extend(np.round(f))
                        #                 output = np.array(output)[:num]
                                        # print ([m,n,output])
                                        
                                        # np.save(self.out_dir + str(m) + str(n), output)
        
        else:
                print (SYS_ARGS.keys())
                print (__doc__)
bug fix and enhancement 5 years ago			`"""`
bug fixes 5 years ago			`import pickle`
bug fix and enhancement 5 years ago			`self.NUM_LABELS = args['label'].shape[1]`
bug fix with dimensions @TODO: GPU workload 5 years ago			`self.init_logs(**args)`

			`def init_logs(self,**args):`
			`self.log_dir = args['logs'] if 'logs' in args else 'logs'`
			`self.mkdir(self.log_dir)`
			`#`
			`#`
			`for key in ['train','output'] :`
			`self.mkdir(os.sep.join([self.log_dir,key]))`
			`self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT]))`

			`self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])`
			`self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])`
			`if self.logger :`
			`#`
			`# We will clear the logs from the data-store`
			`#`
			`column = self.ATTRIBUTES['synthetic']`
			`db = self.logger.db`
			`if db[column].count() > 0 :`
			`db.backup.insert({'name':column,'logs':list(db[column].find()) })`
			`db[column].drop()`

			`def load_meta(self,column):`
			`"""`
			`This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model.`
			`Because prediction and training can happen independently`
			`"""`
			`# suffix = "-".join(column) if isinstance(column,list)else column`
			`suffix = self.get.suffix()`
			`_name = os.sep.join([self.out_dir,'meta-'+suffix+'.json'])`
			`if os.path.exists(_name) :`
			`attr = json.loads((open(_name)).read())`
			`for key in attr :`
			`value = attr[key]`
			`setattr(self,key,value)`
			`self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT])`
			`self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT])`


			`def log_meta(self,**args) :`

			`_object = {`
			`# '_id':'meta',`
			`'CONTEXT':self.CONTEXT,`
			`'ATTRIBUTES':self.ATTRIBUTES,`
			`'BATCHSIZE_PER_GPU':self.BATCHSIZE_PER_GPU,`
			`'Z_DIM':self.Z_DIM,`
			`"X_SPACE_SIZE":self.X_SPACE_SIZE,`
			`"D_STRUCTURE":self.D_STRUCTURE,`
			`"G_STRUCTURE":self.G_STRUCTURE,`
			`"NUM_GPUS":self.NUM_GPUS,`
			`"NUM_LABELS":self.NUM_LABELS,`
			`"MAX_EPOCHS":self.MAX_EPOCHS,`
			`"ROW_COUNT":self.ROW_COUNT`
			`}`
			`if args and 'key' in args and 'value' in args :`
			`key = args['key']`
			`value= args['value']`
			`object[key] = value`
			`# suffix = "-".join(self.column) if isinstance(self.column,list) else self.column`
			`suffix = self.get.suffix()`
			`_name = os.sep.join([self.out_dir,'meta-'+suffix])`

			`f = open(_name+'.json','w')`
			`f.write(json.dumps(_object))`
			`return _object`
			`def mkdir (self,path):`
			`if not os.path.exists(path) :`
bug fix around shape of candidate data to generate 5 years ago
bug fix with dimensions @TODO: GPU workload 5 years ago			`cpu_var = tf.compat.v1.get_variable(name,shape,initializer= initializer)`
			`return cpu_var`
			`def average_gradients(self,tower_grads):`
			`average_grads = []`
			`for grad_and_vars in zip(*tower_grads):`
			`grads = []`
			`for g, _ in grad_and_vars:`
			`expanded_g = tf.expand_dims(g, 0)`
			`grads.append(expanded_g)`

			`grad = tf.concat(axis=0, values=grads)`
			`grad = tf.reduce_mean(grad, 0)`

			`v = grad_and_vars[0][1]`
			`grad_and_var = (grad, v)`
			`average_grads.append(grad_and_var)`
			`return average_grads`
bug fix and enhancement 5 years ago			`This function will build the network that will generate the synthetic candidates`
bug fix with dimensions @TODO: GPU workload 5 years ago			`:inputs`
			`:label`
			`"""`
			`x = args['inputs']`
			`label = args['label']`
			`with tf.compat.v1.variable_scope('D', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)):`
			`for i, dim in enumerate(self.D_STRUCTURE[1:]):`
			`kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[i], dim])`
			`bias = self.get.variables(name='b_' + str(i), shape=[dim])`
			`# print (["\t",bias,kernel])`
			`x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias))`
			`x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS)`
			`i = len(self.D_STRUCTURE)`
			`kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1])`
			`bias = self.get.variables(name='b_' + str(i), shape=[1])`
			`y = tf.add(tf.matmul(x, kernel), bias)`
			`return y`
bug fix with dimensions @TODO: GPU workload 5 years ago			`scope = args['scope']`
			`stage = args['stage']`
			`real = args['real']`
			`label = args['label']`
removing conditions, it blows up computational space 5 years ago			`if self._LABEL is not None :`
			`dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))`
			`else :`
			`dataset = tf.data.Dataset.from_tensor_slices(features_placeholder)`
			`# labels_placeholder = None`
bug fix with dimensions @TODO: GPU workload 5 years ago			`grads = opt.compute_gradients(loss, vars_)`
			`tower_grads.append(grads)`
			`per_gpu_w.append(w)`

			`grads = self.average_gradients(tower_grads)`
			`apply_gradient_op = opt.apply_gradients(grads)`

			`mean_w = tf.reduce_mean(per_gpu_w)`
			`train_op = apply_gradient_op`
			`return train_op, mean_w, iterator, features_placeholder, labels_placeholder`
			`def apply(self,**args):`
			`# max_epochs = args['max_epochs'] if 'max_epochs' in args else 10`
			`REAL = self._REAL`
			`LABEL= self._LABEL`
			`if (self.logger):`
			`pass`

			`with tf.device('/cpu:0'):`
			`opt_d = tf.compat.v1.train.AdamOptimizer(1e-4)`
			`opt_g = tf.compat.v1.train.AdamOptimizer(1e-4)`

			`train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = self.network(stage='D', opt=opt_d)`
			`train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = self.network(stage='G', opt=opt_g)`
			`# saver = tf.train.Saver()`
			`saver = tf.compat.v1.train.Saver()`
			`# init = tf.global_variables_initializer()`
			`init = tf.compat.v1.global_variables_initializer()`
			`logs = []`
			`#with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:`
			`with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:`
bug fix with dimensions @TODO: GPU workload 5 years ago			`NTH_VALID_CANDIDATE = count = np.random.choice(np.arange(2,60),2)[0]`
			`with tf.compat.v1.Session() as sess:`

			`# sess.run(init)`
			`saver.restore(sess, model_dir)`
bug fix with dimensions @TODO: GPU workload 5 years ago			`# _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv'])`
			`# df.to_csv(_name,index=False)`


			`# output.extend(np.round(f))`

			`# for m in range(2):`
			`# for n in range(2, self.NUM_LABELS):`
			`# idx1 = (demo[:, m] == 1)`
			`# idx2 = (demo[:, n] == 1)`
			`# idx = [idx1[j] and idx2[j] for j in range(len(idx1))]`
			`# num = np.sum(idx)`
			`# print ("___________________list__")`
			`# print (idx1)`
			`# print (idx2)`
			`# print (idx)`
			`# print (num)`
			`# print ("_____________________")`
			`# nbatch = int(np.ceil(num / self.BATCHSIZE_PER_GPU))`
			`# label_input = np.zeros((nbatch*self.BATCHSIZE_PER_GPU, self.NUM_LABELS))`
			`# label_input[:, n] = 1`
			`# label_input[:, m] = 1`
			`# output = []`
			`# for i in range(nbatch):`
			`# f = sess.run(fake,feed_dict={y: label_input[i* self.BATCHSIZE_PER_GPU:(i+1)* self.BATCHSIZE_PER_GPU]})`
			`# output.extend(np.round(f))`
			`# output = np.array(output)[:num]`
			`# print ([m,n,output])`

			`# np.save(self.out_dir + str(m) + str(n), output)`

bug fix with dimensions @TODO: GPU workload 5 years ago			`else:`
			`print (SYS_ARGS.keys())`
			`print (__doc__)`