data-maker/data/gan.py

"""
        self.STEPS_PER_EPOCH = 256 #int(np.load('ICD9/train.npy').shape[0] / 2000)   
        self.MAX_EPOCHS = 10 if 'max_epochs' not in args else int(args['max_epochs'])
        self.ROW_COUNT = args['real'].shape[0] if 'real' in args else 100
        self.CONTEXT = args['context']
        self.ATTRIBUTES = {"id":args['column_id'] if 'column_id' in args else None,"synthetic":args['column'] if 'column' in args else None}
        self._REAL = args['real'] if 'real' in args else None
        self._LABEL = args['label'] if 'label' in args else None
        suffix = self.get.suffix()
        _name = os.sep.join([self.out_dir,'meta-'+suffix+'.json'])
        name        name of the scope the 
        labels      labels (attributes not synthesized) by default None
        n_labels    number of labels default None
        """
        inputs  = args['inputs']
        name    = args['name']
        labels  = None if 'labels' not in args else args['labels']
        n_labels= None if 'n_labels' not in args else args['n_labels']
        shift   = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing
        mean, var   = tf.nn.moments(inputs, shift, keep_dims=True)
        shape       = inputs.shape[1].value
        offset_m    = self.get.variables(shape=[n_labels,shape], name='offset'+name,
                                    initializer=tf.zeros_initializer)
        scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,
                                initializer=tf.ones_initializer)
        
        offset  = tf.nn.embedding_lookup(offset_m, labels)
        scale   = tf.nn.embedding_lookup(scale_m, labels)
        result  = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8)
        return result

    def _variable_on_cpu(self,**args):
        """
        This function makes sure variables/tensors are not created on the GPU but rather on the CPU
        """

        name = args['name']
        shape = args['shape']
        initializer=None if 'initializer' not in args else args['initializer']
        with tf.device('/cpu:0') :
            cpu_var =  tf.compat.v1.get_variable(name,shape,initializer= initializer)
        return cpu_var
    def average_gradients(self,tower_grads):
        average_grads = []
        for grad_and_vars in zip(*tower_grads):
            grads = []
            for g, _ in grad_and_vars:
                expanded_g = tf.expand_dims(g, 0)
                grads.append(expanded_g)

            grad = tf.concat(axis=0, values=grads)
            grad = tf.reduce_mean(grad, 0)

            v = grad_and_vars[0][1]
            grad_and_var = (grad, v)
            average_grads.append(grad_and_var)
        return average_grads        


class Generator (GNet):
    """
    This class is designed to handle generation of candidate datasets for this it will aggregate a discriminator, this allows the generator not to be random
    
    """
    def __init__(self,**args):
        GNet.__init__(self,**args)
        self.discriminator = Discriminator(**args)
    def loss(self,**args):
        fake    = args['fake']
        label   = args['label']
        y_hat_fake = self.discriminator.network(inputs=fake, label=label)
                h1 = self.normalize(inputs=tf.matmul(x, kernel),shift=0, name='cbn' + str(i), labels=label, n_labels=self.NUM_LABELS)
                x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS)
            i = len(self.D_STRUCTURE)
            kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1])
            bias = self.get.variables(name='b_' + str(i), shape=[1])
            y = tf.add(tf.matmul(x, kernel), bias)
        return y
    
    def loss(self,**args) :
        """
        This function compute the loss of 
        :real
        :fake
        :label
        """
        real    = args['real']
        fake    = args['fake']
        label   = args['label']
        epsilon = tf.random.uniform(shape=[self.BATCHSIZE_PER_GPU,1],minval=0,maxval=1)
        
        x_hat       = real + epsilon * (fake - real)
        y_hat_fake  = self.network(inputs=fake, label=label)
        
        y_hat_real  = self.network(inputs=real, label=label)
        y_hat       = self.network(inputs=x_hat, label=label)

        grad        = tf.gradients(y_hat, [x_hat])[0]
        slopes      = tf.sqrt(tf.reduce_sum(tf.square(grad), 1))
        gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
                        (real, label) = iterator.get_next()
                sess.run(iterator_g.initializer,
                        feed_dict={features_placeholder_g: REAL, labels_placeholder_g: LABEL})

                for epoch in range(1, self.MAX_EPOCHS + 1):
                    start_time = time.time()
                    w_sum = 0
                    for i in range(self.STEPS_PER_EPOCH):
                        for _ in range(2):
                            _, w = sess.run([train_d, w_distance])
                            w_sum += w
                        sess.run(train_g)
                    duration = time.time() - start_time

                    assert not np.isnan(w_sum), 'Model diverged with loss = NaN'

                    format_str = 'epoch: %d, w_distance = %f (%.1f)'
                    print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration))

        # i = np.arange(_map[column]['start'],_map[column]['end'])
        max_epochs = np.int32(SYS_ARGS['max_epochs']) if 'max_epochs' in SYS_ARGS else 10
        # REAL    = _df[:,i]
        REAL    = pd.get_dummies(df[column]).astype(np.float32).values
        LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values
        trainer = Train(context=context,max_epochs=max_epochs,real=REAL,label=LABEL,column=column,column_id=column_id)
        trainer.apply()
        
        
        #
        # We should train upon this data
        #
        # -- we need to convert the data-frame to binary matrix, given a column
        #
        pass
    elif 'generate' in SYS_ARGS:
        values = df[column].unique().tolist()
        values.sort()
bug fix and enhancement 5 years ago			`"""`
bug fix and enhancement 5 years ago			`self.STEPS_PER_EPOCH = 256 #int(np.load('ICD9/train.npy').shape[0] / 2000)`
			`self.MAX_EPOCHS = 10 if 'max_epochs' not in args else int(args['max_epochs'])`
			`self.ROW_COUNT = args['real'].shape[0] if 'real' in args else 100`
			`self.CONTEXT = args['context']`
			`self.ATTRIBUTES = {"id":args['column_id'] if 'column_id' in args else None,"synthetic":args['column'] if 'column' in args else None}`
			`self._REAL = args['real'] if 'real' in args else None`
			`self._LABEL = args['label'] if 'label' in args else None`
fixes with the framework - only supports single feature 5 years ago			`suffix = self.get.suffix()`
			`_name = os.sep.join([self.out_dir,'meta-'+suffix+'.json'])`
bug fix and enhancement 5 years ago			`name name of the scope the`
			`labels labels (attributes not synthesized) by default None`
			`n_labels number of labels default None`
			`"""`
			`inputs = args['inputs']`
			`name = args['name']`
			`labels = None if 'labels' not in args else args['labels']`
			`n_labels= None if 'n_labels' not in args else args['n_labels']`
			`shift = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing`
			`mean, var = tf.nn.moments(inputs, shift, keep_dims=True)`
			`shape = inputs.shape[1].value`
			`offset_m = self.get.variables(shape=[n_labels,shape], name='offset'+name,`
			`initializer=tf.zeros_initializer)`
			`scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name,`
			`initializer=tf.ones_initializer)`

			`offset = tf.nn.embedding_lookup(offset_m, labels)`
			`scale = tf.nn.embedding_lookup(scale_m, labels)`
			`result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8)`
			`return result`

			`def _variable_on_cpu(self,**args):`
			`"""`
			`This function makes sure variables/tensors are not created on the GPU but rather on the CPU`
			`"""`

			`name = args['name']`
			`shape = args['shape']`
			`initializer=None if 'initializer' not in args else args['initializer']`
			`with tf.device('/cpu:0') :`
			`cpu_var = tf.compat.v1.get_variable(name,shape,initializer= initializer)`
			`return cpu_var`
			`def average_gradients(self,tower_grads):`
			`average_grads = []`
			`for grad_and_vars in zip(*tower_grads):`
			`grads = []`
			`for g, _ in grad_and_vars:`
			`expanded_g = tf.expand_dims(g, 0)`
			`grads.append(expanded_g)`

			`grad = tf.concat(axis=0, values=grads)`
			`grad = tf.reduce_mean(grad, 0)`

			`v = grad_and_vars[0][1]`
			`grad_and_var = (grad, v)`
			`average_grads.append(grad_and_var)`
			`return average_grads`


			`class Generator (GNet):`
			`"""`
			`This class is designed to handle generation of candidate datasets for this it will aggregate a discriminator, this allows the generator not to be random`

			`"""`
			`def __init__(self,**args):`
			`GNet.__init__(self,**args)`
			`self.discriminator = Discriminator(**args)`
			`def loss(self,**args):`
			`fake = args['fake']`
			`label = args['label']`
			`y_hat_fake = self.discriminator.network(inputs=fake, label=label)`
bug fix with compatibility (tf 2.0) 5 years ago			`h1 = self.normalize(inputs=tf.matmul(x, kernel),shift=0, name='cbn' + str(i), labels=label, n_labels=self.NUM_LABELS)`
bug fix and enhancement 5 years ago			`x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS)`
			`i = len(self.D_STRUCTURE)`
			`kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1])`
			`bias = self.get.variables(name='b_' + str(i), shape=[1])`
			`y = tf.add(tf.matmul(x, kernel), bias)`
			`return y`

			`def loss(self,**args) :`
			`"""`
			`This function compute the loss of`
			`:real`
			`:fake`
			`:label`
			`"""`
			`real = args['real']`
			`fake = args['fake']`
			`label = args['label']`
			`epsilon = tf.random.uniform(shape=[self.BATCHSIZE_PER_GPU,1],minval=0,maxval=1)`

			`x_hat = real + epsilon * (fake - real)`
			`y_hat_fake = self.network(inputs=fake, label=label)`

			`y_hat_real = self.network(inputs=real, label=label)`
			`y_hat = self.network(inputs=x_hat, label=label)`

			`grad = tf.gradients(y_hat, [x_hat])[0]`
			`slopes = tf.sqrt(tf.reduce_sum(tf.square(grad), 1))`
			`gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)`
bug fix with compatibility (tf 2.0) 5 years ago			`(real, label) = iterator.get_next()`
bug fix and enhancement 5 years ago			`sess.run(iterator_g.initializer,`
			`feed_dict={features_placeholder_g: REAL, labels_placeholder_g: LABEL})`

			`for epoch in range(1, self.MAX_EPOCHS + 1):`
			`start_time = time.time()`
			`w_sum = 0`
			`for i in range(self.STEPS_PER_EPOCH):`
			`for _ in range(2):`
			`_, w = sess.run([train_d, w_distance])`
			`w_sum += w`
			`sess.run(train_g)`
			`duration = time.time() - start_time`

			`assert not np.isnan(w_sum), 'Model diverged with loss = NaN'`

			`format_str = 'epoch: %d, w_distance = %f (%.1f)'`
			`print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration))`
bug fixes with operations 5 years ago
bug fix and enhancement 5 years ago			`# i = np.arange(_map[column]['start'],_map[column]['end'])`
			`max_epochs = np.int32(SYS_ARGS['max_epochs']) if 'max_epochs' in SYS_ARGS else 10`
			`# REAL = _df[:,i]`
			`REAL = pd.get_dummies(df[column]).astype(np.float32).values`
			`LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values`
			`trainer = Train(context=context,max_epochs=max_epochs,real=REAL,label=LABEL,column=column,column_id=column_id)`
			`trainer.apply()`




			`#`
			`# We should train upon this data`
			`#`
			`# -- we need to convert the data-frame to binary matrix, given a column`
			`#`
			`pass`
			`elif 'generate' in SYS_ARGS:`
			`values = df[column].unique().tolist()`
			`values.sort()`