diff --git a/WGAN.py b/WGAN.py
new file mode 100644
index 0000000..0dfdff0
--- /dev/null
+++ b/WGAN.py
@@ -0,0 +1,286 @@
+import tensorflow as tf
+from tensorflow.contrib.layers import l2_regularizer
+import numpy as np
+import time
+import os
+
+os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+
+#### id of gpu to use
+os.environ['CUDA_VISIBLE_DEVICES'] = "0"
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+
+#### training data
+#### shape=(n_sample, n_code=854)
+REAL = np.load('')
+
+#### demographic for training data
+#### shape=(n_sample, 6)
+#### if sample_x is male, then LABEL[x,0]=1, else LABEL[x,1]=1
+#### if sample_x's is within 0-17, then LABEL[x,2]=1
+#### elif sample_x's is within 18-44, then LABEL[x,3]=1
+#### elif sample_x's is within 45-64, then LABEL[x,4]=1
+#### elif sample_x's is within 64-, then LABEL[x,5]=1
+LABEL = np.load('')
+
+#### training parameters
+NUM_GPUS = 1
+BATCHSIZE_PER_GPU = 2000
+TOTAL_BATCHSIZE = BATCHSIZE_PER_GPU * NUM_GPUS
+STEPS_PER_EPOCH = int(np.load('ICD9/train.npy').shape[0] / 2000)
+
+g_structure = [128, 128]
+d_structure = [854, 256, 128]
+z_dim = 128
+
+def _variable_on_cpu(name, shape, initializer=None):
+    with tf.device('/cpu:0'):
+        var = tf.get_variable(name, shape, initializer=initializer)
+    return var
+
+
+def batchnorm(inputs, name, labels=None, n_labels=None):
+    mean, var = tf.nn.moments(inputs, [0], keep_dims=True)
+    shape = mean.shape[1].value
+    offset_m = _variable_on_cpu(shape=[n_labels,shape], name='offset'+name,
+                                initializer=tf.zeros_initializer)
+    scale_m = _variable_on_cpu(shape=[n_labels,shape], name='scale'+name,
+                               initializer=tf.ones_initializer)
+    offset = tf.nn.embedding_lookup(offset_m, labels)
+    scale = tf.nn.embedding_lookup(scale_m, labels)
+    result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8)
+    return result
+
+
+def layernorm(inputs, name, labels=None, n_labels=None):
+    mean, var = tf.nn.moments(inputs, [1], keep_dims=True)
+    shape = inputs.shape[1].value
+    offset_m = _variable_on_cpu(shape=[n_labels,shape], name='offset'+name,
+                                initializer=tf.zeros_initializer)
+    scale_m = _variable_on_cpu(shape=[n_labels,shape], name='scale'+name,
+                               initializer=tf.ones_initializer)
+    offset = tf.nn.embedding_lookup(offset_m, labels)
+    scale = tf.nn.embedding_lookup(scale_m, labels)
+    result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8)
+    return result
+
+
+def input_fn():
+    features_placeholder = tf.placeholder(shape=REAL.shape, dtype=tf.float32)
+    labels_placeholder = tf.placeholder(shape=LABEL.shape, dtype=tf.float32)
+    dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
+    dataset = dataset.repeat(10000)
+    dataset = dataset.batch(batch_size=BATCHSIZE_PER_GPU)
+    dataset = dataset.prefetch(1)
+    iterator = dataset.make_initializable_iterator()
+    # next_element = iterator.get_next()
+    # init_op = iterator.initializer
+    return iterator, features_placeholder, labels_placeholder
+
+
+def generator(z, label):
+    x = z
+    tmp_dim = z_dim
+    with tf.variable_scope('G', reuse=tf.AUTO_REUSE, regularizer=l2_regularizer(0.00001)):
+        for i, dim in enumerate(g_structure[:-1]):
+            kernel = _variable_on_cpu('W_' + str(i), shape=[tmp_dim, dim])
+            h1 = batchnorm(tf.matmul(x, kernel), name='cbn' + str(i), labels=label, n_labels=8)
+            h2 = tf.nn.relu(h1)
+            x = x + h2
+            tmp_dim = dim
+        i = len(g_structure) - 1
+        kernel = _variable_on_cpu('W_' + str(i), shape=[tmp_dim, g_structure[-1]])
+        h1 = batchnorm(tf.matmul(x, kernel), name='cbn' + str(i),
+                       labels=label, n_labels=8)
+        h2 = tf.nn.tanh(h1)
+        x = x + h2
+
+        kernel = _variable_on_cpu('W_' + str(i+1), shape=[128, 854])
+        bias = _variable_on_cpu('b_' + str(i+1), shape=[854])
+        x = tf.nn.sigmoid(tf.add(tf.matmul(x, kernel), bias))
+    return x
+
+
+def discriminator(x, label):
+    with tf.variable_scope('D', reuse=tf.AUTO_REUSE, regularizer=l2_regularizer(0.00001)):
+        for i, dim in enumerate(d_structure[1:]):
+            kernel = _variable_on_cpu('W_' + str(i), shape=[d_structure[i], dim])
+            bias = _variable_on_cpu('b_' + str(i), shape=[dim])
+            x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias))
+            x = layernorm(x, name='cln' + str(i), labels=label, n_labels=8)
+        i = len(d_structure)
+        kernel = _variable_on_cpu('W_' + str(i), shape=[d_structure[-1], 1])
+        bias = _variable_on_cpu('b_' + str(i), shape=[1])
+        y = tf.add(tf.matmul(x, kernel), bias)
+    return y
+
+
+def compute_dloss(real, fake, label):
+    epsilon = tf.random_uniform(
+        shape=[BATCHSIZE_PER_GPU, 1],
+        minval=0.,
+        maxval=1.)
+    x_hat = real + epsilon * (fake - real)
+    y_hat_fake = discriminator(fake, label)
+    y_hat_real = discriminator(real, label)
+    y_hat = discriminator(x_hat, label)
+
+    grad = tf.gradients(y_hat, [x_hat])[0]
+    slopes = tf.sqrt(tf.reduce_sum(tf.square(grad), 1))
+    gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2)
+    all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
+    w_distance = -tf.reduce_mean(y_hat_real) + tf.reduce_mean(y_hat_fake)
+    loss = w_distance + 10 * gradient_penalty + sum(all_regs)
+    tf.add_to_collection('dlosses', loss)
+
+    return w_distance, loss
+
+
+def compute_gloss(fake, label):
+    y_hat_fake = discriminator(fake, label)
+    all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
+    loss = -tf.reduce_mean(y_hat_fake) + sum(all_regs)
+    tf.add_to_collection('glosses', loss)
+    return loss, loss
+
+
+def tower_loss(scope, stage, real, label):
+    label = tf.cast(label, tf.int32)
+    label = label[:, 1] * 4 + tf.squeeze(
+        tf.matmul(label[:, 2:], tf.constant([[0], [1], [2], [3]], dtype=tf.int32)))
+    z = tf.random_normal(shape=[BATCHSIZE_PER_GPU, z_dim])
+    fake = generator(z, label)
+    if stage == 'D':
+        w, loss = compute_dloss(real, fake, label)
+        losses = tf.get_collection('dlosses', scope)
+    else:
+        w, loss = compute_gloss(fake, label)
+        losses = tf.get_collection('glosses', scope)
+
+    total_loss = tf.add_n(losses, name='total_loss')
+
+    # loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+    # loss_averages_op = loss_averages.apply(losses + [total_loss])
+    #
+    # with tf.control_dependencies([loss_averages_op]):
+    #     total_loss = tf.identity(total_loss)
+
+    return total_loss, w
+
+
+def average_gradients(tower_grads):
+    average_grads = []
+    for grad_and_vars in zip(*tower_grads):
+        grads = []
+        for g, _ in grad_and_vars:
+            expanded_g = tf.expand_dims(g, 0)
+            grads.append(expanded_g)
+
+        grad = tf.concat(axis=0, values=grads)
+        grad = tf.reduce_mean(grad, 0)
+
+        v = grad_and_vars[0][1]
+        grad_and_var = (grad, v)
+        average_grads.append(grad_and_var)
+    return average_grads
+
+
+def graph(stage, opt):
+    # global_step = tf.get_variable(stage+'_step', [], initializer=tf.constant_initializer(0), trainable=False)
+    tower_grads = []
+    per_gpu_w = []
+    iterator, features_placeholder, labels_placeholder = input_fn()
+    with tf.variable_scope(tf.get_variable_scope()):
+        for i in range(NUM_GPUS):
+            with tf.device('/gpu:%d' % i):
+                with tf.name_scope('%s_%d' % ('TOWER', i)) as scope:
+                    (real, label) = iterator.get_next()
+                    loss, w = tower_loss(scope, stage, real, label)
+                    tf.get_variable_scope().reuse_variables()
+                    vars_ = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=stage)
+                    grads = opt.compute_gradients(loss, vars_)
+                    tower_grads.append(grads)
+                    per_gpu_w.append(w)
+
+    grads = average_gradients(tower_grads)
+    apply_gradient_op = opt.apply_gradients(grads)
+
+    mean_w = tf.reduce_mean(per_gpu_w)
+    train_op = apply_gradient_op
+    return train_op, mean_w, iterator, features_placeholder, labels_placeholder
+
+
+def train(max_epochs, train_dir):
+    with tf.device('/cpu:0'):
+        opt_d = tf.train.AdamOptimizer(1e-4)
+        opt_g = tf.train.AdamOptimizer(1e-4)
+        train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = graph('D', opt_d)
+        train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = graph('G', opt_g)
+        saver = tf.train.Saver()
+        init = tf.global_variables_initializer()
+
+        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
+            sess.run(init)
+            sess.run(iterator_d.initializer,
+                     feed_dict={features_placeholder_d: REAL, labels_placeholder_d: LABEL})
+            sess.run(iterator_g.initializer,
+                     feed_dict={features_placeholder_g: REAL, labels_placeholder_g: LABEL})
+
+            for epoch in range(1, max_epochs + 1):
+                start_time = time.time()
+                w_sum = 0
+                for i in range(STEPS_PER_EPOCH):
+                    for _ in range(2):
+                        _, w = sess.run([train_d, w_distance])
+                        w_sum += w
+                    sess.run(train_g)
+                duration = time.time() - start_time
+
+                assert not np.isnan(w_sum), 'Model diverged with loss = NaN'
+
+                format_str = 'epoch: %d, w_distance = %f (%.1f)'
+                print(format_str % (epoch, -w_sum/(STEPS_PER_EPOCH*2), duration))
+                if epoch % 500 == 0:
+                    # checkpoint_path = os.path.join(train_dir, 'multi')
+                    saver.save(sess, train_dir, write_meta_graph=False, global_step=epoch)
+                    # saver.save(sess, train_dir, global_step=epoch)
+
+
+def generate(model_dir, synthetic_dir, demo):
+    tf.reset_default_graph()
+    z = tf.random_normal(shape=[BATCHSIZE_PER_GPU, z_dim])
+    y = tf.placeholder(shape=[BATCHSIZE_PER_GPU, 6], dtype=tf.int32)
+    label = y[:, 1] * 4 + tf.squeeze(tf.matmul(y[:, 2:], tf.constant([[0], [1], [2], [3]], dtype=tf.int32)))
+    fake = generator(z, label)
+    saver = tf.train.Saver()
+    with tf.Session() as sess:
+        saver.restore(sess, model_dir)
+        for m in range(2):
+            for n in range(2, 6):
+                idx1 = (demo[:, m] == 1)
+                idx2 = (demo[:, n] == 1)
+                idx = [idx1[j] and idx2[j] for j in range(len(idx1))]
+                num = np.sum(idx)
+                nbatch = int(np.ceil(num / BATCHSIZE_PER_GPU))
+                label_input = np.zeros((nbatch*BATCHSIZE_PER_GPU, 6))
+                label_input[:, n] = 1
+                label_input[:, m] = 1
+                output = []
+                for i in range(nbatch):
+                    f = sess.run(fake,feed_dict={y: label_input[i*BATCHSIZE_PER_GPU:(i+1)*BATCHSIZE_PER_GPU]})
+                    output.extend(np.round(f))
+                output = np.array(output)[:num]
+                np.save(synthetic_dir + str(m) + str(n), output)
+
+
+if __name__ == '__main__':
+    #### args_1: number of training epochs
+    #### args_2: dir to save the trained model
+    train(500, '')
+
+    #### args_1: dir of trained model
+    #### args_2: dir to save synthetic data
+    #### args_3, label of data-to-be-generated
+    generate('', '', demo=LABEL)
+