diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..2fd6442 --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dd02d11 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +from ubuntu +RUN ["apt-get","update"] +RUN ["apt-get","upgrade","-y"] +RUN ["apt-get","install","-y","git", "python3-dev","tmux","locales","python3-pip","python3-numpy","python3-pandas","locales"] +RUN ["pip3","install","pandas-gbq","tensorflow"] +RUN ["mkdir","-p","/usr/apps"] +WORKDIR /usr/apps +RUN ["git","clone","https://hiplab.mc.vanderbilt.edu/git/gan.git","aou-gan"] diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..f7f5a6f --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,80 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "x = np.arange(-4,4)\n", + "def sigmoid(x):\n", + " e = np.exp(-x)\n", + " return np.divide(1,e + e)\n", + "df = pd.DataFrame({\"x\":x,\"tanh\":np.tanh(x),\"sigmoid\":sigmoid( np.tanh(x))})" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXhU5d3/8fedhYRAWMKeAAZU9iVgQFRUFHeEQK11o9VatVWx2sVH69NFa31+trVaW63Voq1tBUTRgIpaUSm4IpCw78iShCVk35PJ3L8/zhACJJCQmZyZzOd1Xblytsz9TcRP7nzPmXOMtRYREQkvEW4XICIirU/hLyIShhT+IiJhSOEvIhKGFP4iImEoyu0CGtO9e3ebnJzsdhkiIiFl1apVh6y1PU52XNCGf3JyMitXrnS7DBGRkGKM2d2U49T2EREJQwp/EZEwpPAXEQlDQdvzb0hNTQ1ZWVlUVla6XUrQi42NpW/fvkRHR7tdiogEoZAK/6ysLOLj40lOTsYY43Y5QctaS15eHllZWQwYMMDtckQkCIVU26eyspJu3bop+E/CGEO3bt30F5KINCqkwh9Q8DeRfk4iciIh1fYRERHAUw0VBb6PfOdzue9zEyn8m6mwsJA5c+Zw1113ndLXT5o0iSeeeILU1FQ/VyYiIcdbCxWFDYd4Rf7RyxUFUO4L/OqSFg+t8G+mwsJC/vKXv5xy+ItIG2QtVBX7wvqYoD4q1I8J+MoioJEHapkIiO0CcQnQvit07A09hh5Zb9+13nLCkfVHOjWpZIV/Mz344IPs2LGDlJQULrroItauXUtBQQE1NTX85je/IS0tjV27dnHllVcyceJEPvvsM5KSkli4cCHt27cH4LXXXuOuu+6isLCQF198kfPPP9/l70pE6lSXQ3leI7PvwkZm5gVgaxt/zZjO0P5wkCdA1wENB3f7hCPHxXSGiMCdlg3Z8H/krQ1szCn262sOS+zEr6YOP+Exjz/+OOvXryczMxOPx0N5eTmdOnXi0KFDTJgwgWnTpgGwbds25s6dy9/+9je+9a1vsWDBAmbOnAmAx+NhxYoVLF68mEceeYQlS5b49fsQkWbyemH7EljxgvO5sdl4dAdfUPtm3r2GHxPcx8zG4xKc2Xtk8EVt8FUUQqy1PPTQQyxbtoyIiAiys7M5cOAAAAMGDCAlJQWAs846i127dtV93Te+8Y0Gt4tIK6sogMw5sOJvUPA1dOwFE+9reGbevitEx7pdsd+EbPifbIbeGl555RVyc3NZtWoV0dHRJCcn111bHxMTU3dcZGQkFRUVdeuH90VGRuLxeFq3aBGB/evhq7/B2vlQUw79z4HJv4AhUyGqndvVtYqQDX+3xMfHU1LinGkvKiqiZ8+eREdH8/HHH7N7d5PupCoibqitgc1vO7P83Z9CVHsYdS2Mux36jHK7ulan8G+mbt26cd555zFixAjGjRvH5s2bGTlyJKmpqQwZMsTt8kTkWCUHYPXLsPIlKNkHXU6Dy34DKTc5rZ0wZaxt5MSGy1JTU+2xD3PZtGkTQ4cOdami0KOfl4QtayHrK+cE7oZ08NbA6ZNh/B1w5qUQEel2hQFjjFllrT3pG4k08xeRtqOmAta/ASueh31rIKYTjLvN+eh+htvVBRWFv4iEvsI98NWLsPqfzvX3PYbClCdh1HUQ09Ht6oKSwl9EQpO1sHOpcwJ367uAgSFTnNZO8kTQzQ1PSOEvIqGlshjWzHMu1Ty0FeK6wcQfQeqt0Lmv29WFDIW/iISG3K3OCdw1c6G6FJLOghnPw7DpberNV61F4S8iwctbC1vfc0J/51KIbAcjrnGuze97ltvVhbSQe5hLMLrtttvYuHFjQMe46qqrKCwsPG77ww8/zBNPPBHQsUVaXVkefPIUPJ0C826EQ9th8i/hRxthxl8V/H6gmb8fzJ49O+BjLF68OOBjiLguJ8M5gbvudaitguTz4fLHYPBVQXlztFCmmX8zlZWVMWXKFEaPHs2IESN49dVXmTRpEoffkPbiiy8yaNAgxo8fz+23386sWbMAuOWWW7jzzjuZMGECAwcOZOnSpdx6660MHTqUW265pe71586dy8iRIxkxYgQPPPBA3fbk5GQOHToEwGOPPcagQYOYOHEiW7Zsab1vXiQQPNXOPXZmXwIvTHLelDVmJtz1BdzyNgybpuAPgND9ib77IOxf59/X7D0Srnz8hIe89957JCYm8s477wDO/X2ee+45AHJycnj00UdZvXo18fHxXHzxxYwePbruawsKCvj8889ZtGgR06ZN49NPP2X27NmMGzeOzMxMevbsyQMPPMCqVavo2rUrl112Genp6UyfPr3uNVatWsW8efPqbik9duxYzjpLfwJLCCrOgZV/h1X/gLKDkHA6XPFbSLkBYju7XV2bp5l/M40cOZIPPviABx54gOXLl9O585F/pCtWrODCCy8kISGB6Ohorr322qO+durUqRhjGDlyJL169WLkyJFEREQwfPhwdu3axVdffcWkSZPo0aMHUVFR3HTTTSxbtuyo11i+fDkzZswgLi6OTp061T0/QCQkWAu7PoX5N8NTI2DZ752rdma+AbNWwoQfKPhbSejO/E8yQw+UQYMGsXr1ahYvXszPf/5zJk+e3OSvPXwr54iIiKNu+RwREYHH4yE6Otrv9YoEheoyp7Wz4m9wcIPzgJNz7nJuu9A12e3qwpJm/s2Uk5NDXFwcM2fO5P7772f16tV1+8aNG8d///tfCgoK8Hg8LFiwoFmvPX78eP773/9y6NAhamtrmTt3LhdeeOFRx1xwwQWkp6dTUVFBSUkJb731ll++L5GAyNsB7z0EfxgKb9/nPJd22p/hx5ucO2sq+F0TujN/l6xbt47777+fiIgIoqOjee655/jpT38KQFJSEg899BDjx48nISGBIUOGHNUWOpk+ffrw+OOPc9FFF2GtZcqUKaSlpR11zNixY7nuuusYPXo0PXv2ZNy4cX79/kRazOuFHR861+Zv+8C5g+awNOe2C/3O1m0XgoRu6exnpaWldOzYEY/Hw4wZM7j11luZMWOGK7WEws9L2pjcrc51+XnbnEcipt4KZ90C8b3drixs6JbOLnn44YdZsmQJlZWVXHbZZUddqSPSpuVuhZevdk7qXvMiDJ0WNo9EDEUKfz/Tu20lLB3a5gt+L9z8NvTUU+2CXcid8A3WNlWw0c9JWs2hbfAPBX+o8Uv4G2NeMsYcNMasb2S/Mcb8yRiz3Riz1hgz9lTGiY2NJS8vT8F2EtZa8vLyiI3VnQ4lwOqCv1bBH2L81fb5B/AM8M9G9l8JnOn7OBt4zve5Wfr27UtWVha5ubmnWGb4iI2NpW9f3dtcAujQdif4vR7nNgwK/pDil/C31i4zxiSf4JA04J/WmbJ/YYzpYozpY63d15xxoqOjGTBgQAsqFRG/yNvh9Pjrgl9XlbUGay3l1bWUVnkoqfRQWuWhtNJDaVVN3XpTtdYJ3yRgb731LN+2o8LfGHMHcAdA//79W6k0EWmWvB3wjylQWwM3v6XgbwJPrZeyqlpKqmrqArukLriPrJfV2+bsrzluv9dPXe+gutrHWvsC8AI41/m7XI6IHCtvh9Pqqa12evy9hrldUcBYa6nyeOvNruvNtqtqjg/wBpYP76+oqW3SmB1jopyPWOdzfGwUvTrF1m2Lr9sXfcz6kY+E3zbt+2ut8M8G+tVb7+vbJiKhoi74q5wZfxsKfmstq/cUkJ6Rw/JtuRRVODPumtqTz0EjIwzx9QI4PjaKbh3acVq3DnXr9UM9PiaKDseFeRQd2kUREdF6735urfBfBMwyxszDOdFb1Nx+v4i46LjgH+52RX6x/WAJ6Rk5LFyTzd78CmKiIrhgUA96d4o9avZ9dHhHH7UvJioCE4K3rPBL+Btj5gKTgO7GmCzgV0A0gLX2r8Bi4CpgO1AOfNcf44pIK8jfCS9PBU9lmwj+A8WVLMrMIT0zmw05xUQYOO+M7tw3eRCXj+hNx5ig6oYHjL+u9rnhJPstcLc/xhKRVpS/05nx11Q4wd97hNsVnZLiyhreW7+fhZnZfLYjD2thVN/O/OLqYUwd3Yee8eH3npjw+BUnIs2X/zX8Y6ov+BeFXPBXeWpZuiWXhZnZLNl0kGqPl9O6xXHPxWcyPSWRgT06ul2iqxT+InK8/K99M/4y34x/pNsVNYnXa1mxK5+Fmdm8s3YfxZUeunVox43j+5OWkkhKvy4h2Z8PBIW/iBwtBIN/075i0jOzeSszh5yiSuLaRXL58N6kpSQy8YzuREWG3G3MAk7hLyJHFOxyTu7WlMF3FgV18GcXVjgnbjOy2XKghMgIw4WDevDAlUO4dFgv4top3k5EPx0RcRTscmb8VSVOj7/PKLcrOk5heTWL1+0nPTObFV/nAzC2fxd+nTacKSP70K1jzEleQQ5T+IsIFOx2Tu7WBf9otyuqU1lTy4ebDpKemc3SLQepqbWc3qMDP7l0EGkpSfTvFud2iSFJ4S8S7gp2+2b8xfCdhUER/LVey+c78kjPzOa99fsprfLQMz6Gm89JZvqYJIYndtKJ2xZS+IuEs2ODPzHFtVKstazP9p24XZPDwZIq4mOiuHJEb6aPSWLCwG5EtuLtD9o6hb9IuCrc49yWuarIObnrUvDvySsnPTOb9MxsduaWER1puGhwT6aPSeLiIT2JjY50pa62TuEvEo4K9zi3Za4scmXGn1daxTvr9vFmRjYZewoBGD8ggdsmDuSqkb3pEqcHvweawl8k3BTudVo9lUXw7XRIHNMqw5ZXe/hg4wHSM7JZtu0QtV7LkN7xPHDFEKalJJLUpX2r1CEOhb9IOCnc68z4KwrhO+mQdEqP024yT62X5dsPsTAjm/9sPEB5dS2JnWO5/fyBTB+TyJDenQI6vjRO4S8SLgr3Oj3+ikL4zpsBC35rLRl7C1mYkc3ba/eRV1ZN5/bRpKUkMT0lkXHJCa1633ppmMJfJBwUZTnBX17gC/6zAjJMdmEFN7+0gu0HS2kXFcGlQ3uRlpLIhYN7EBOlE7fBROEv0tYVZTmtnvJ8p8cfoOCvqfVyz5zV7C+q5HfXjOKKkb3pFBsdkLGk5RT+Im1ZUbZzcvdw8PcNTPADPPH+FlbvKeTPN4xh6ujEgI0j/qFb3Ym0VUXZvhl/Hnz7zYAG/0ebD/D8sp3cdHZ/BX+IUPiLtEVF2b4efx7MfAP6pgZsqH1FFfxk/hqG9unEL65uOw91b+sU/iJtTXGOE/yluU7w9xsXsKE8tV7umZNBtcfLszeO0btxQ4h6/iJtSXGO0+opzXVaPQEMfoA/fLCVlbsLePr6lLB/LGKo0cxfpK0oznFO7pbmwrcDO+MHWLrlIM8t3cEN4/uRlpIU0LHE/zTzF2kLivf5gv+Ar9UzPqDD7S+q5Mfz1zCkdzy/mjo8oGNJYGjmLxLqivf5evy+4O9/dkCH89R6+eG8DCprannmxrHq84cozfxFQlnJfif4S/bDzAUBD36APy7Zxoqv83nqutGc0VN9/lClmb9IqCrZ75zcrQv+CQEfcvm2XJ5dup1vpfZlxpi+AR9PAkfhLxKKSvY7Pf7ifXDT660S/AeKK7lvXiZn9uzII9NGBHw8CSy1fURCTckBeHmqc3XPzAVw2jkBH7LWa7l3Xgbl1bW8etNY2rdTnz/UKfxFQknJAafHX5QNM19vleAHePrDbXyxM58nrh3NGT3jW2VMCSy1fURCxeEZf1E23PQanHZuqwz76fZD/PmjbVwzti/fPEt9/rZC4S8SCkoP+oI/ywn+5PNaZdiDJZXcOy+T03t05NHpup6/LVHbRyTYlR50Tu4W7XVO7rZS8Nd6LffNy6S0qoY5t59NXDvFRVui/5oiwaxuxr+3VWf8AM98tJ3PduTxu2tGMaiX+vxtjdo+IsGqNNcJ/sI9cON8SJ7YakN/viOPpz/cyjfGJHFtqvr8bZFfwt8Yc4UxZosxZrsx5sEG9t9ijMk1xmT6Pm7zx7gibdbh4C/Y7QT/gPNbbejckip+OC+DAd078Oj0ERijh623RS1u+xhjIoFngUuBLOArY8wia+3GYw591Vo7q6XjibR5dcG/C25q3eD3ei0/np9JcUUN//reeDrEqDPcVvlj5j8e2G6t3WmtrQbmAWl+eF2R8FLrgXWvw0uX1wv+C1q1hL8s3c7ybYd4eNpwhvTu1KpjS+vyR/gnAXvrrWf5th3rGmPMWmPM68aYfg29kDHmDmPMSmPMytzcXD+UJhICairgq9nw57Gw4HsQEem8gauVg//LnXk8+cFW0lISuX5cg/+LShvSWn/TvQXMtdZWGWO+D7wMXHzsQdbaF4AXAFJTU20r1SbijopCWPkifPEclOVC0llw+f/B4KsgonWvxcgrdfr8yd068NiMkerzhwF/hH82UH+a0Ne3rY61Nq/e6mzgd34YVyQ0leyHL/4CX70E1SVw+mSY+CPnah4XQtfrtfxo/hoKymv4+y3j6ag+f1jwx3/lr4AzjTEDcEL/euDG+gcYY/pYa/f5VqcBm/wwrkhoydsBn/0JMueA1wPDpsPE+6DPaFfL+uuyHSzbmstvpo9gWKL6/OGixeFvrfUYY2YB7wORwEvW2g3GmF8DK621i4AfGmOmAR4gH7ilpeOKhIycTPj0j7BxIUREw5iZcO49kDDQ7cr4alc+f/jPVq4e1Yebzu7vdjnSioy1wdlaT01NtStXrnS7DJFTYy18vcwJ/R0fQUwnGPc9OPtOiO/ldnUA5JdVc9XTy4mNjuCteyYSHxvtdkniB8aYVdba1JMdp+aeiD95vbD5bfjkKchZDR16wiUPQ+qtENvZ7erqeL2Wn8zPJL+smjfuOlfBH4YU/iL+4KmGta/Cp09D3jbomgxXPwWjb4ToWLerO87flu/k4y25PJo2nBFJwfNLSVqPwl+kJapKYNXL8PmzUJIDvUfBN1+CoWkQGZz/e63anc/v3t/CVSN7M3PCaW6XIy4Jzn+dIsGu7BB8+TyseAEqCyH5fEh7Bk6/2JXLNZuqoKyae+ZkkNSlPY9fM0rX84cxhb9IcxTshs+fgdX/Ak8FDLnauUa/70nPr7nOWsv9r68ht7SKBXeeSyf1+cOawl+kKQ5sdK7cWfe6M7MfdT2c90PoMdjtyprsxU++Zsmmgzw8dRij+nZxuxxxmcJf5ET2fOFcubP1PYjuAGf/AM65CzqH1j3uM/YU8Pi7m7lieG9uPjfZ7XIkCCj8RY5lLWz7jxP6ez6H9gkw6SEYfzvEJbhdXbMVldcwa04GvTvH8ttvqs8vDoW/yGG1HtjwBnzyRzi4ATr3gyt+C2O/De06uF3dKbHW8tPX13CwpJLXfnAundurzy8Ohb9IdTlkvuLcd6dwD/QYAjOehxHXQGRoh+XfP93FBxsP8Iurh5HST31+OULhL+GrogBWzIYv/wrlh6DveLjyd3Dm5a1+S+VAWLO3kP/37iYuHdaLW89LdrscCTIKfwk/xTnOm7JW/QOqS+HMy5zLNfufE9TX6DdHUUUNd89ZTc/4WH6vPr80QOEv4ePQNuf2C2vmga112jrn3Qe9R7hdmV9Za3ng9bXsL6pk/g/OoUtcO7dLkiCk8Je2L3uVcxJ301sQFQNn3QLnznLuv9MG/fPz3by3YT//e9VQxvbv6nY5EqQU/tI2WQs7l8InTzq3Vo7pDOf/xLlOv2MPt6sLmHVZRTz2ziYmD+nJbecPcLscCWIKfwld1jo9+/J8qMiH8jwoL3Ceh7v2VdiXCR17w6WPOrP92Lb9lKriSqfP371jO564drT6/HJCCn8JDl6vc4O0uiD3hfnh5frhXn+/t6bh10s4Hab+CUZf77R62jhrLT9bsI7swgrmf38CXTuozy8npvAX//NUO5dRNhje+b59x4R7ZSFYb8OvFxHlvMs2LsH5nDDQuZHa4W1x3Y7ef3hbGM18//3lHt5Zt48HrxzCWaeF3ruQpfUp/KVx1kJNeQOz8YIGAv3w/nyoLmn8NaPa+8LZF9SdR/oCu9sx4V1vOaZTWAV5c63PLuLRtzYyaXAP7jjf/ecCS2gI3vCvLILNi4F6zxg+6nnDzdne3Negge2nUIe31rmk0Hp9y9ZZ9/q21S3bY45r7a/xHnOcF2oqnED3VDb8cwEnlOtCuht0H1RvBt71+EBvnwDt4hp/PWm2ksoaZs1ZTUKHdjz5rRQiIvRLUpomeMM/fyfMu8HtKlqfiQATCRGRR5ZNhPOO07rlevsiIhr5GuNbP+ZrIqIaeO3D6xFHviYqFuK6NtBS6XYk3EP81gehzlrLQ2+uZ29BBfPumECC+vzSDMEb/j0Gwx1znOWj/uSvt9yc7X5/jfq7G3mNw0HalFA+HOQiTTR3xV7eWpPD/ZcPZlyy+vzSPMEb/tFxkJjidhUiQWljTjEPv7WBCwb14M4LT3e7HAlBmmqKhJjSKg+z5qyma1w0T31rtPr8ckqCd+YvIsex1vLzN9exK6+MubdPoFvHtv8eBgkMzfxFQsj8lXtJz8zhR5cM4uyB3dwuR0KYwl8kRGzeX8wvF27g/DO7c9dFZ7hdjoQ4hb9ICCir8nD3K6vp1D6aJ7+VQqT6/NJCCn+RIGet5Rfp6/n6UBlPX59Cj3j1+aXlFP4iQe61VVm8kZHNvZMHce7p3d0uR9oIhb9IENt6oIRfLlzPuad3Y9bF6vOL/yj8RYJUebXT5+8YE80fr1efX/xL1/mLBKlfLdzA9txS/v29s+kZH+t2OdLG+GXmb4y5whizxRiz3RjzYAP7Y4wxr/r2f2mMSfbHuCJt1YJVWby2Kot7Lj6T885Qn1/8r8Xhb4yJBJ4FrgSGATcYY4Ydc9j3gAJr7RnAU8BvWzquSFu1/WAJP09fz4SBCdw7+Uy3y5E2yh8z//HAdmvtTmttNTAPSDvmmDTgZd/y68BkoweMihynorqWu1/JIK5dJE9fP0Z9fgkYf4R/ErC33nqWb1uDx1hrPUARcNx7040xdxhjVhpjVubm5vqhNJHQ8shbG9h6sISnrkuhVyf1+SVwgupqH2vtC9baVGttao8ePdwuR6RVpWdkM++rvdw96QwuGKR//xJY/gj/bKBfvfW+vm0NHmOMiQI6A3l+GFukTdiRW8pDb65jfHIC912iPr8Enj/C/yvgTGPMAGNMO+B6YNExxywCbvYtfxP4yNrGHporEl4qa2q5+5XVxEZH8qcbxhAVGVR/kEsb1eLr/K21HmPMLOB9IBJ4yVq7wRjza2CltXYR8CLwL2PMdiAf5xeEiACPvLWRzftL+Md3x9G7s/r80jr88iYva+1iYPEx235Zb7kSuNYfY4m0JYvW5DB3xR7unHQ6kwb3dLscCSP6+1LEJV8fKuNnC9aSelpXfnLpILfLkTCj8BdxweE+f3RUhPr84grd20fEBb95ZyMb9xXz0i2pJHZp73Y5EoY03RBpZW+vzeHfX+zh+xcM5OIhvdwuR8KUwl+kFe3OK+PBBesY278LP718sNvlSBhT+Iu0kipPLXfPWU1khOHPN44lWn1+cZF6/iKt5P/e2cT67GJmfyeVJPX5xWWaeoi0gnfX7ePlz3dz28QBXDJMfX5xn8JfJMD25JXzPwvWMrpfF/7niiFulyMCKPxFAqrKU8usuasxwDM3jKFdlP6Xk+Cgnr9IAD3+7mbWZhXx/LfPol9CnNvliNTRNEQkQN7fsJ+/f7qL756XzOXDe7tdjshRFP4iAbA3v5z7X1vDqL6d+dmVQ90uR+Q4Cn8RP6v2eJk1NwMLPHPDWPX5JSip5y/iZ797bzNr9hby15lj6d9NfX4JTpqSiPjRBxsPMPuTr7n5nNO4YkQft8sRaZTCX8RPsgrK+elraxiR1ImHpqjPL8FN4S/iBzW1Xu6Zm4HXa3n2xrHEREW6XZLICannL+IHT7y/hYw9hTx741hO69bB7XJETkozf5EW+mjzAZ5ftpOZE/ozZZT6/BIaFP4iLZBTWMGP569hWJ9O/HzKMLfLEWkyhb/IKaqp9fLDuRnUeLw8e9NYYqPV55fQoZ6/yCl68oOtrNxdwJ9uGMOA7urzS2jRzF/kFHy85SDPLd3BDeP7M210otvliDSbwl+kmfYXVfKT+WsY0jueX01Vn19Ck8JfpBk8vj5/ZU2t+vwS0tTzF2mGPy7Zxopd+fzxuhRO79HR7XJETplm/iJNtGxrLs8u3c51qf2YPibJ7XJEWkThL9IEB4or+dGrmQzqGc/D04a7XY5Iiyn8RU7icJ+/vLqWZ28aQ/t26vNL6FPPX+Qk/vThNr78Op8/XDuaM3rGu12OiF9o5i9yAp9sO8SfP97ON8/qyzVn9XW7HBG/UfiLNOJgSSX3vZrJGT068us09fmlbWlR+BtjEowxHxhjtvk+d23kuFpjTKbvY1FLxhRpDbVey33zMimtquHZm8YS104dUmlbWjrzfxD40Fp7JvChb70hFdbaFN/HtBaOKRJwf/5oG5/tyOPXaSMY1Et9fml7Whr+acDLvuWXgektfD0R13224xBPf7iNb4xJ4lr1+aWNamn497LW7vMt7wd6NXJcrDFmpTHmC2NMo78gjDF3+I5bmZub28LSRJovt6SKe+dlMrB7Bx6dPgJjjNsliQTESRuZxpglQO8Gdv1v/RVrrTXG2EZe5jRrbbYxZiDwkTFmnbV2x7EHWWtfAF4ASE1Nbey1RAKi1mv50auZFFfU8K/vjadDjPr80nad9F+3tfaSxvYZYw4YY/pYa/cZY/oABxt5jWzf553GmKXAGOC48Bdx018+3s4n2w/x+DdGMqR3J7fLEQmolrZ9FgE3+5ZvBhYee4AxpqsxJsa33B04D9jYwnFF/OqLnXk8tWQraSmJXDeun9vliARcS8P/ceBSY8w24BLfOsaYVGPMbN8xQ4GVxpg1wMfA49Zahb8EjUOlVdw7L4Pkbh14bMZI9fklLLSoqWmtzQMmN7B9JXCbb/kzYGRLxhEJFK+vz19QXsPfbxlPR/X5JUzoHb4S1p777w6WbzvEr6YOY1ii+vwSPhT+ErZWfJ3Pkx9sZeroRG4c39/tckRalcJfwlJ+WTU/nJtBv67t+b8Zup5fwo8anBJ2vG376TcAAAnGSURBVF7Lj+dnkl9WzRt3nUt8bLTbJYm0Os38Jey8sHwnS7fk8ourhzIiqbPb5Yi4QuEvYWXV7nx+//4Wpozsw8wJp7ldjohrFP4SNgrKqpk1J4OkLu35f9foen4Jb+r5S1iw1vLT19aQV1rNgjvPpZP6/BLmNPOXsDB7+dd8uPkgD101hJF91ecXUfhLm7d6TwG/fW8zVwzvzc3nJrtdjkhQUPhLm1ZYXs09czLo0yWW335zlPr8Ij7q+Uub5fT513KwpJLXf3Aundurzy9ymGb+0ma99Okulmw6wINXDmV0vy5ulyMSVBT+0iZl7i3k8Xc3cemwXtx6XrLb5YgEHYW/tDlFFTXMmrOanvGxPPHN0erzizRAPX9pM8qrPfxnwwH+/tku9hdV8toPzqFznPr8Ig1R+EtIq6n18sm2Q6RnZvOfDQeoqKklsXMsv792FGP6d3W7PJGgpfCXkGOtJWNvIQszsnl77T7yyqrp3D6a6WOSmJ6SyLjkBCIi1OoRORGFv4SMHbmlLMzIZuGaHHbnlRMTFcElQ3uRlpLIpME9aRelU1giTaXwl6B2sLiSRWtyWJiZw7rsIiIMnHt6d2ZddAZXjOite/GLnCKFvwSdksoa3lu/n4WZOXy24xBeCyOTOvPzKUOZNjqRnp1i3S5RJOQp/CUoVHu8LN1ykIWZOSzZdIAqj5f+CXHMuugMpqUkcUbPjm6XKNKmKPzFNV6vZeXuAtIzs1m8bh+F5TUkdGjHdeP6kZaSxNj+XXSNvkiAKPyl1W3ZX0J6ZjaLMnPILqygfXQklw3vxfSUJCae2Z3oSJ24FQk0hb+0ipzCChatySE9I5vN+0uIjDCcf2Z37r98MJcO60WHGP1TFGlN+j9OAqaovIbF6/eRnpHNil35WAtj+nfhkWnDmTKqD907xrhdokjYUviLX1XW1PLR5oOkZ2SzdEsu1bVeBnbvwH2TB5GWkkhy9w5ulygiKPzFD2q9li935pGemc276/dTUumhR3wMMyecxvQxiYxM6qwTtyJBRuEvp8Ray4acYhZmZrNoTQ4HiqvoGBPF5cN7M31MIuee3p1I3WJBJGgp/KVZ9uaXszAzm/TMHLYfLCU60nDhoJ784upELhnai9joSLdLFJEmUPjLSeWXVfPO2hzSM3NYtbsAgPHJCTw2YwRXjehD1w7tXK5QRJpL4S8Nqqiu5T8bnVssLNuai8drGdSrI/dfPpi0lET6do1zu0QRaQGFfxtW67WUVnmcj0oPpVU1lFR6KKuqrVs+ss9DSb3lzfuKKauupU/nWL43cQBpKUkM7ROvE7cibUSLwt8Ycy3wMDAUGG+tXdnIcVcATwORwGxr7eMtGbetq/Z460K5pKqmLpBLqzzHB7Yv1I8cf2RfeXVtk8br0C6SjrFRdIzxfcRGMS0liWmjEzl7gO6NL9IWtXTmvx74BvB8YwcYYyKBZ4FLgSzgK2PMImvtxhaOfUqstVgLXmvx+j4fWXe22UY+Hz6moa8/9piaWkvZUWFdc9zs+tiwLvPtr/Z4T/p9RBjoGBNFfGx0XWB3iWtH34Q44uuFuHNMFB1joo9Zd/Z3aBelq3JEwlCLwt9auwk4WStgPLDdWrvTd+w8IA04YfhvPVDCxX9YenQwe48NWV+Y41v3njzY3dYuKsIJ53oz7cQusfXCOvpIOPu2HT6+Q8yR5fbRkWrBiMgpa42efxKwt956FnB2QwcaY+4A7gDolDiQYX06EWEMEQYijMHUW46IcH7pGKg7xtnvOybCYEy9fZz8mMNjOK95eH+9MY/5etNQXb5joiJMvfCO9oV3JDFRuhRSRNx30vA3xiwBejew63+ttQv9WYy19gXgBYDU1FT7zI1j/fnyIiLic9Lwt9Ze0sIxsoF+9db7+raJiIhLWuPG6V8BZxpjBhhj2gHXA4taYVwREWlEi8LfGDPDGJMFnAO8Y4x537c90RizGMBa6wFmAe8Dm4D51toNLStbRERaoqVX+7wJvNnA9hzgqnrri4HFLRlLRET8R8/LExEJQwp/EZEwpPAXEQlDCn8RkTBkrA2Cex40wBhTAmxxu44W6A4ccruIFlD97lL97gnl2gEGW2vjT3ZQMN/SeYu1NtXtIk6VMWal6neP6ndXKNcfyrWDU39TjlPbR0QkDCn8RUTCUDCH/wtuF9BCqt9dqt9doVx/KNcOTaw/aE/4iohI4ATzzF9ERAJE4S8iEoaCMvyNMVcYY7YYY7YbYx50u57mMMa8ZIw5aIxZ73Ytp8IY088Y87ExZqMxZoMx5l63a2oOY0ysMWaFMWaNr/5H3K6puYwxkcaYDGPM227X0lzGmF3GmHXGmMymXnIYTIwxXYwxrxtjNhtjNhljznG7pqYyxgz2/dwPfxQbY+5r9Phg6/n7Hvi+lXoPfAducOuB781ljLkAKAX+aa0d4XY9zWWM6QP0sdauNsbEA6uA6SH08zdAB2ttqTEmGvgEuNda+4XLpTWZMebHQCrQyVp7tdv1NIcxZheQaq0NyTdJGWNeBpZba2f7nj8SZ60tdLuu5vLlaDZwtrV2d0PHBOPMv+6B79baauDwA99DgrV2GZDvdh2nylq7z1q72rdcgvMMhiR3q2o66yj1rUb7PoJrhnMCxpi+wBRgttu1hBtjTGfgAuBFAGttdSgGv89kYEdjwQ/BGf4NPfA9ZMKnLTHGJANjgC/draR5fG2TTOAg8IG1NpTq/yPwP4DX7UJOkQX+Y4xZZYy5w+1immkAkAv83dd2m22M6eB2UafoemDuiQ4IxvCXIGCM6QgsAO6z1ha7XU9zWGtrrbUpOM+LHm+MCYn2mzHmauCgtXaV27W0wERr7VjgSuBuXxs0VEQBY4HnrLVjgDIgpM45AvjaVdOA1050XDCGvx747jJfr3wB8Iq19g236zlVvj/ZPwaucLuWJjoPmObrm88DLjbG/NvdkprHWpvt+3wQ5yl/492tqFmygKx6fym+jvPLINRcCay21h440UHBGP564LuLfCdMXwQ2WWufdLue5jLG9DDGdPEtt8e5cGCzu1U1jbX2Z9bavtbaZJx/9x9Za2e6XFaTGWM6+C4SwNcuuQwImaverLX7gb3GmMG+TZOBkLjQ4Rg3cJKWDwThXT2ttR5jzOEHvkcCL4XSA9+NMXOBSUB338Ptf2WtfdHdqprlPODbwDpf3xzgId9zmENBH+Bl39UOEcB8a23IXTIZonoBbzrzB6KAOdba99wtqdnuAV7xTTx3At91uZ5m8f3SvRT4/kmPDbZLPUVEJPCCse0jIiIBpvAXEQlDCn8RkTCk8BcRCUMKfxGRMKTwFxEJQwp/EZEw9P8BNZtzVO57r88AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df[['tanh','sigmoid']].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/WGAN.py b/WGAN.py index 0dfdff0..186d5f8 100644 --- a/WGAN.py +++ b/WGAN.py @@ -3,7 +3,7 @@ from tensorflow.contrib.layers import l2_regularizer import numpy as np import time import os - +import pandas as pd os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" #### id of gpu to use @@ -13,7 +13,7 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #### training data #### shape=(n_sample, n_code=854) -REAL = np.load('') +REAL = None #np.load('') #--diagnosis codes (binary) #### demographic for training data #### shape=(n_sample, 6) @@ -22,16 +22,16 @@ REAL = np.load('') #### elif sample_x's is within 18-44, then LABEL[x,3]=1 #### elif sample_x's is within 45-64, then LABEL[x,4]=1 #### elif sample_x's is within 64-, then LABEL[x,5]=1 -LABEL = np.load('') +LABEL = None #np.load('') #-- demographics 0,5 set it to 1,0,0,0,0,0 #### training parameters NUM_GPUS = 1 BATCHSIZE_PER_GPU = 2000 TOTAL_BATCHSIZE = BATCHSIZE_PER_GPU * NUM_GPUS -STEPS_PER_EPOCH = int(np.load('ICD9/train.npy').shape[0] / 2000) +STEPS_PER_EPOCH = 256 #int(np.load('ICD9/train.npy').shape[0] / 2000) g_structure = [128, 128] -d_structure = [854, 256, 128] +d_structure = [854, 256, 128] #-- change 854 to number of diagnosis z_dim = 128 def _variable_on_cpu(name, shape, initializer=None): @@ -277,6 +277,13 @@ def generate(model_dir, synthetic_dir, demo): if __name__ == '__main__': #### args_1: number of training epochs #### args_2: dir to save the trained model + from bridge import Binary + df = pd.read_csv('exports/observation.csv') + cols = 'observation_source_value' + _map,_df = (Binary()).Export(df) + i = np.arange(_map[cols]['start'],_map[cols]['end']) + REAL = _df[:,i] + LABEL = np.arange(0,_df.shape[0]) train(500, '') #### args_1: dir of trained model diff --git a/bridge.py b/bridge.py index f9489ee..fa323af 100644 --- a/bridge.py +++ b/bridge.py @@ -23,13 +23,12 @@ if len(sys.argv) > 1: value = None if sys.argv[i].startswith('--'): key = sys.argv[i].replace('-','') - + SYS_ARGS[key] = 1 if i + 1 < N: value = sys.argv[i + 1] = sys.argv[i+1].strip() if key and value: SYS_ARGS[key] = value - if key == 'context': - SYS_ARGS[key] = ('/'+value).replace('//','/') + i += 2 @@ -107,7 +106,7 @@ class pseudonym : # print (df.head()[:5]) # sys.stdout.flush() TABLE_NAME = ".".join([args['dataset']+DATASET_SUFFIX,PSEUDO_TABLENAME]) - df.to_gbq(TABLE_NAME,credentials=credentials,if_exists='append') + df.to_gbq(TABLE_NAME,credentials=credentials,if_exists='append',chunksize=10000) # df.to_gbq(TABLE_NAME.replace('.','_pseudo.'),credentials=credentials,if_exists='append') class Builder : @@ -159,18 +158,29 @@ class Binary : This function will convert a column into a binary matrix with the value-space representing each column of the resulting matrix :column a column vector i.e every item is a row """ - values = np.unique(column) + # values = np.unique(column) + + values = column.dropna().unique() values.sort() - + # + # Let's treat the case of missing values i.e nulls + # row_count,col_count = column.size,values.size + matrix = [ np.zeros(col_count) for i in np.arange(row_count)] # # let's create a binary matrix of the feature that was passed in # The indices of the matrix are inspired by classical x,y axis - for yi in np.arange(row_count) : - value = column[yi] - xi = np.where(values == value)[0][0] #-- column index - matrix[yi][xi] = 1 + + if col_count > 0 and values.size > 1: + + for yi in np.arange(row_count) : + value = column[yi] + if value not in values : + continue + xi = np.where(values == value) + xi = xi[0][0] #-- column index + matrix[yi][xi] = 1 return matrix def Export(self,df) : @@ -180,7 +190,9 @@ class Binary : """ # # This will give us a map of how each column was mapped to a bitstream - _map = df.apply(lambda column: self.__stream(column.values),axis=0) + + _map = df.fillna(np.nan).apply(lambda column: self.__stream(column),axis=0) + # # We will merge this to have a healthy matrix _matrix = _map.apply(lambda row: list(list(itertools.chain(*row.values.tolist()))),axis=1) @@ -198,7 +210,7 @@ class Binary : _m[name] = {"start":beg,"end":end} beg = end - return _m,_matrix + return _m,_matrix.astype(np.float32) def Import(self,df,values,_map): """ @@ -216,8 +228,8 @@ class Binary : # has_basic = 'dataset' in SYS_ARGS.keys() and 'table' in SYS_ARGS.keys() and 'key' in SYS_ARGS.keys() # has_action= 'export' in SYS_ARGS.keys() or 'pseudo' in SYS_ARGS.keys() -df = pd.DataFrame({"fname":['james','james','steve','kevin','kevin'],"lname":["bond","dean","nyemba",'james','johnson']}) -df['age'] = (np.random.sample(df.shape[0]) * 100).astype(np.int32) +# df = pd.DataFrame({"fname":['james','james','steve','kevin','kevin'],"lname":["bond","dean","nyemba",'james','johnson']}) +# df['age'] = (np.random.sample(df.shape[0]) * 100).astype(np.int32) if __name__ == '__main__' : """ Run the program from the command line passing the following mandatory arguments @@ -253,6 +265,7 @@ if __name__ == '__main__' : builder.process(**SYS_ARGS) else: print ("") + print (SYS_ARGS.keys()) print ("has basic ",has_basic) print ("has action ",has_action) # pseudonym.apply(table='person',dataset='wgan_original',key='./curation-test-2.json') diff --git a/bridge.pyc b/bridge.pyc new file mode 100644 index 0000000..3569b94 Binary files /dev/null and b/bridge.pyc differ diff --git a/curation-prod.json b/curation-prod.json new file mode 100644 index 0000000..2c75f66 --- /dev/null +++ b/curation-prod.json @@ -0,0 +1,12 @@ +{ + "type": "service_account", + "project_id": "aou-res-curation-prod", + "private_key_id": "ecbf77975c5b7b1f4d4b1680bf67a5e0fd11dfaf", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDcDBFIIOYtSlKU\njA5xBaRYk6+jccisNrvfBT3Lue9Kqy6ad35V+gUGqZ18bIKAgziQy8m5Iiw5VDFm\nZslN4yUFy05g0k+XY0HYq2rqeMeThH9pQ/xfN6XsOy8ED9eONzvnhuX2Jbevm9qr\n0xSIAkFsBYGNs+NjgU4fSmLfptAt2rVs19BI1qRPVBBzA640+hWIATVqr0ibnpE7\ndl7ONzWPWgmgHfhu2A1MjYO5obNJ4NyZ8Y86aAa7N098r+388QGX3XN2I9rWfKIj\nUGEpapmEKwnb8zC92v9GaT2qfbO2vdNsRYE930LGmxlmW+Ji7YO5FRaitvuk2iMU\n7g8d/GZZAgMBAAECggEAIUXi3Bb7byhAne0ipuTtQaaNRafoKeA53sJ+Yl6aaB5D\n1QASFqKQXX5KzbxjrFaLOFvURB3+dWm9cYhD0rbwy3Q/RQUwG0pbM83RxCQgu3Xq\nxSpOUECMIpEdbh4OIFdKQ9tiTOrNoGxu75HiliFPLqwTd6+Wh96Ox0z6b+qbqn8S\nqcEK0JQXvzC1BbR7vhsySIFP5hz8F0JThm94B3tiClzsixGCk6wydXuPs64x3rGt\nZ57dxBQBUVxYmaI3LQ/1cm7nv7uqfbUHDZrpLzE6/AevP5iNyzY1bkdUJ45mj2Ay\nWhqW9ftOhyRE9C2djPcopgrjRPbH/U0491tTLuc2XQKBgQDyp08o7mEz97/aGWmr\nNj3+QjBwNoDkdiR3qUrgohLwSNahSpiPv9+yjGvfXHQUxNyJfJ2Zei5bSTCjqKTk\nNq4QmvO4gsEhABOuqU0U0NlrpGSj0amwrCrqh7gxG/tnSuVEOzEKbY9g0CaXlg1O\nbJtP8yvicJc7m/5RxLKI8LoW1QKBgQDoJnKv8+JZc/16FI/4bU8BwUUHRiazWDIt\n9aCt63h+Fs6PAAFuGo04lobQEukbwU3EB63jWKCaxGJkjh+/lLkTelzRlVyVs0N0\nOb9WL4vYtwMrmtXKPfqKmJS81qwlLHA0+YBeE56uElwyFMAEsIIRb4YjffZd3Cy9\nT19cMSmbdQKBgBo046HCDRF1wmylrfnlw9BACcc0u7rw34Nk70dPecgltbh5u/xa\ndqhr7gKTk53inQbkRIkc3wDQ6MXkItra5PW6JnRY+s67mWSVuFN1MuYjPRNMQ41n\nKsNloQj8wqwnNJen5OYBayjDkkdw10MPC78YvjaYflzbvh3KppWPmil5AoGBAKID\nWxyynrABA9A0E3mzh2TZJbx6171n+rUaa8WUxKVycztXLKhTfWUVoAYMfISzNftt\nxIwaKRN5pJU6nquMNlGCns5hZ5jN33B4cLDMQ9O9fUfsKfGXqYcaDwtu4fqbdb9y\ntIRzOtWO2KrW0l8zc8KJS1rvqIU+iDah8xIa+UeVAoGBAKagVX0AN1RwoRg7LKdZ\n9eMQeYfaeVrfbxMDqEluEJzAQbvRoPZ75UNMre+vTOHLZuPF9uT+N71amgkKaL1T\nV1qWzNBU0bvpD9xvdCJWmypoccV2by1Nj2rPll5wfg1CPhmEQuNB30YLOTAws9Tc\nmb0kWAwnL39cUQyXJ5zBGd3K\n-----END PRIVATE KEY-----\n", + "client_email": "aou-res-curation-prod@appspot.gserviceaccount.com", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/aou-res-curation-prod%40appspot.gserviceaccount.com" +} diff --git a/curation-test-2.json b/curation-test-2.json new file mode 100644 index 0000000..a021dba --- /dev/null +++ b/curation-test-2.json @@ -0,0 +1,12 @@ +{ + "type": "service_account", + "project_id": "aou-res-curation-test", + "private_key_id": "be9cb7427212dea882379d125530f5339ba854a7", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCljt1hxwqInD2U\nKLv9SQX08tE0+APKzOH4Cw3vZt495aAlmKpsRt0ze3HdobouOQXzySqJZqHqgK3k\n5oqjlXOEFVrupIO5WnujGA3Ln7SP9vK1fjiNeKFvX/W+ePULRsOp1pZts53p5OzU\nS2PU2UowAVip9iJAjTeLpoF4cYjHG1jM4oYIRq8mCtuBNmsNE6peY4lWrlouHIvy\nPKJOAQ0kwEbtxsVfEBYqvcb8X5NSFi4/gwP5y1z8ALjQ3eLJjcqPfsAGI2Lpf7Ah\nM+RbW3rkT0FKCbUjUY1NNhQKguDdzeTGModjGyQxp3Y7qT1LHOvRKIZXb3Ft3f1C\nHyUsytJlAgMBAAECggEAEk8TXS3VlLgFTgOXOUfrGGSwuDU5Y3h3APxlT8rGMdLZ\nfBmUYfcQSBI9zG8x7MyyQ3yaxKk3Uidlk8E0fH9u+qDLS/BLqk2UYLwB7Tk95FyW\nCMuVq3ziCt7HiYdM6jCq5iHCGbhZyApgxTWKgSPVQtZ98gXd/IThgK3VoaFEqWgc\nsVDO/AokZF56luDHzISALh3+LhsoYxerTP43XA4jDv4i/qzmDAwUcBf1mI76qaOZ\nOwoETJre+kaI61ZqVcnGteSVnvfb8Z5e5Gvwtig2akiNbT+E/HeTfQiCTJ9r5dUA\n5U1r4O4Veu0gLENvK4NE0kdn3k6BTLeOljuxIXzHPQKBgQDk2/0SdhZYScAsXts+\nFl3FJbVU409szRX/uUWtBjD2sIm9GRYmBv07Kk3MV+Egh8e9Ps/wjb6fxbhlEVGf\nvbPuR9pn4Ci+fllH7TWsy1atcyZaZXD22/eHOXOjiE+rFUAfO94fXIxVXtB0yuxe\nf+zQ6rltpn5ttZBQghYsm0weawKBgQC5MRftZqf+d7AzP5Pj4mPUhxmwrHNWaryw\nHAqer4X9kjS/zBlHWQjdqA9rpFgXzaETRY5sbC9ef2FgCG2mSAYEvyYBA4L8t71s\ntUO/v3VgSs0xheOnAI8RFnq5g5Bbzd4IPZB+dEq9gPph+P/QLCFpRX0LFzhOwkrx\nvaicvMFmbwKBgHFL5tEI3K8Ac76Dhw4JjIpYzJgln+BA9y8NzUyG0B6P7uBKVwik\nVSDBJJqQtsaf8WXifpab1U7LVynRlRL7muPPdnQOKJ2FdzWAXR4Z2+MqKkZ+CZpr\n8vJiorjGdoo/jurnfGMSMfbhZVksTC/MLLSQPxPlZJlzVOpGPCwBBYHZAoGALmKE\nirresxcJdByljzuiI5ZfMehPz0JW1ol/g3WVSwj2219kqYE8fkBc9GoqgnPHt4sB\nfFiwmKuxGRujUzXRBBlYjIJzqZbgBD12pa1v2dmCgbf2aFr0eqQ1wweX/daXmVrK\nOVIpckO+8xEqCdsz1ylHg6KiQN/bY6dMd02z51MCgYAZRr+hutgjPOQEdWujFgkL\noTypuWmqvdQmOj8L0o3wee2D5ScMx7obtoKhYP+FpC4U9xgoImiS9hLL3FurwVNi\n36GEodFO7iTFnBowJFp+COW5xX8ISEc0LVKkFoHyMfXZa+zxFWRTRRRwzmnHGBq+\nRY2vrlcCx36QEcQdwFR72Q==\n-----END PRIVATE KEY-----\n", + "client_email": "aou-res-curation-test@appspot.gserviceaccount.com", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/aou-res-curation-test%40appspot.gserviceaccount.com" +} diff --git a/curation-test.json b/curation-test.json new file mode 100644 index 0000000..f48f64d --- /dev/null +++ b/curation-test.json @@ -0,0 +1,12 @@ +{ + "type": "service_account", + "project_id": "aou-res-curation-test", + "private_key_id": "1ed8d298e4b5572e7556b2f079133ea04568396a", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCwf6bQeRLpKhlh\nvFIjiZvdu68mm/6x37zmH5jBoCv8fWteUbqSwyPzz3954qLpENvad/ob+ELHMOnU\n9LNgRwKTYNuBpRRPELdh95lF7zmHyat6GRA0Y6ofIU1ScjzJlQlFQ8+PnYNTrpgh\nqOtmgLAbI7S0mMsQodXsZwuPyHW0nf5CCY7gnXuThxmTt46hY7Zd9WCKH7ra+P9W\nqzHRRQyuC/43bQIFuHP1wWUrmnJCbypWcgRQEKfoWyXKVEsNGcbUMYLFOixraQuk\n8mhRgncMe2dR7R2M60Q0fLKAF4CZDfG2N70ACoIY8sFGkXSjFG2rlok7spYwdYIX\nafKQHbZ1AgMBAAECggEAAzb7NfSs/5psACFuFsY3+xFvyF9ZNvcxMx9wzyU/BKg2\n9buKXCFgY12S3+72jBIDcL0ns2CE76jet9zFjNbheQeJTmXp2UjS9kTywaXXIYSI\nWL7h6/pdJZg1ltW/pEvp8WnuewCukC5WO6K4HiCKh9Jq+H3uxCMWfB0iX+BevuC4\n4FEC0eJ6BD5rI8gUr5HO8VtCgxW99dJHgrdx+rRlJEaeY5FwGLW/ITBjsV0S48Pl\nvxcpHWbUCn13tE1EWR0QhFyazUWw8xqdY1+H++ku45pAZuMxYlFGkhssBbVJqYwP\nMjkjy9z4n/xXUVj7iwTWweQ2dvZracKEmBP2szAJkQKBgQDeGXMRJ9UK79Anewm/\nqXQWKzMYzwcWT+1hTfUweL5SMzTjO/tuqMsDqndCeAacpLUOxXjIcINWP/P1+VDH\nIFj8JpQMw86t2JUwMqcmSk44he85wfT3qgoxe6LglQIbWgV6cZY1OKnkuKIln2FW\nlpGdiSRRM430+wN2Fq9YsFRimQKBgQDLcFlBVL0NTC+wlD72I6/dSKeWNpz50epp\ng4qOg3zq7rMa8S/O/m1w3ZFyeAz+E4LA1Mi42MQPr6vhGFPczQgoPboe2rCnIPqR\nnFhkWqLBTk7BgmqnZV1lzrdvosmGscOdfQwnw8gNDe1KjAmPQvdP95qGcYKh5kKu\nxz3P3S74PQKBgAZ9YeJfcpc2OLPeoYNLNUwsiPqxmfhp73rHZ2G6NX17Z5E4QHmU\nTxJVWdTEYxUSrwO2e3gH6Z6MkdlfJqAa7t63Vd4lnpVv3bQh1saEp1J5f2sFot3V\nxyR5A2JimEQqVjykswntFPHM/1fwF00La0faKQiCZCSDbS93LDqANIcJAoGBAJmE\nc2YweuVA+6/lfsmhToHO5OAe4EBI3vq2j+VRZf+nFzMalDhAmPeVy780xqEouf+n\n0rxinzkzGKIpCIfTlPdA9WV5I9tKsKsW70DzgGQdIqM2NiOSA3PjFVvB3Q+ur231\nwilzvU/UlZ8uo7wfDZ+julD/8VMY/nMD2So1v88FAoGACPUobP69SukbZIl4rYLL\nAZEcxlQCOP/2nWGY7spReiIZKqXCkwMElR8r41//Kb6/h0knKlW8NsC2vpvOBgHO\nG7ZYooscHP8v203lPtGykaBA1xeFY5NKD0gGAG+CmSLorM8cYMUv4RXrIOtmAgrG\nXdLo0jPwQXGSTqOdPvNqBi0=\n-----END PRIVATE KEY-----\n", + "client_email": "aou-res-curation-test@appspot.gserviceaccount.com", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/aou-res-curation-test%40appspot.gserviceaccount.com" +} diff --git a/exports/observation.csv b/exports/observation.csv new file mode 100644 index 0000000..31812ae --- /dev/null +++ b/exports/observation.csv @@ -0,0 +1,511 @@ +observation_id,person_id,observation_concept_id,observation_date,observation_datetime,observation_type_concept_id,value_as_number,value_as_string,value_as_concept_id,qualifier_concept_id,unit_concept_id,provider_id,visit_occurrence_id,observation_source_value,observation_source_concept_id,unit_source_value,qualifier_source_value,value_source_concept_id,value_source_value,questionnaire_response_id +118208,5557,9425,3823,10549,1331,,16669.0,,,,,8936.0,11839,6242,1849.0,5535.0,,, +112221,5557,1268,8176,4688,1331,,16669.0,,,,,9908.0,7436,6037,1849.0,5535.0,,, +92924,5557,1268,3823,13501,1331,,16669.0,,,,,8043.0,7436,6037,1849.0,5535.0,,, +87525,5557,562,6238,5732,1331,,16669.0,,,,,2539.0,3555,914,1849.0,5535.0,,, +88732,5557,1268,3823,10549,1331,,16669.0,,,,,8936.0,7436,6037,1849.0,5535.0,,, +127541,5230,255,4070,17510,1331,,,5490.0,,,,8823.0,10678,7672,,,,, +143650,665,5987,6705,10269,1331,,,5490.0,,,,6992.0,11454,8747,,,,, +69801,665,8220,11554,17750,1331,,,5490.0,,,,6910.0,4645,7332,,,,, +102810,665,3637,1222,15887,1331,,,5490.0,,,,5139.0,5309,5692,,,,, +143746,665,8499,3363,666,1331,,,5490.0,,,,7948.0,5963,2112,,,,, +70261,665,7654,11258,18677,1331,,,5490.0,,,,12665.0,6282,333,,,,, +40451,665,3637,1330,6520,1331,,,5490.0,,,,5793.0,6548,7716,,,,, +20543,665,6866,9228,15133,1331,,,5490.0,,,,9252.0,11880,1768,,,,, +100742,665,5987,11806,11363,1331,,,5490.0,,,,,4102,192,,,,, +128493,665,3637,9520,13609,1331,,,5490.0,,,,8390.0,5309,5692,,,,, +118347,665,4084,11258,18677,1331,,,5490.0,,,,12665.0,1637,10082,,,,, +70737,665,9675,6995,21994,1331,,,5490.0,,,,8156.0,11592,3413,,,,, +16780,9034,2273,8988,12680,1331,,,5490.0,,,,11630.0,7739,5531,,,,, +48409,9034,2273,8530,10717,1331,,,5490.0,,,,2681.0,7739,5531,,,,, +95301,665,2123,3877,22319,1331,,,5490.0,,,,12932.0,4701,11099,,,,, +109187,665,8499,586,20186,1331,,,5490.0,,,,13320.0,5963,2112,,,,, +131936,665,3637,3019,1916,1331,,,5490.0,,,,326.0,5309,5692,,,,, +11545,665,9675,11258,18677,1331,,,5490.0,,,,12665.0,229,9044,,,,, +45240,665,9675,5659,7261,1331,,,5490.0,,,,3579.0,3906,3214,,,,, +94641,9034,2123,8988,12680,1331,,,5490.0,,,,11630.0,9951,1400,,,,, +61317,665,8499,12397,21443,1331,,,5490.0,,,,7050.0,5963,2112,,,,, +96121,665,5987,11258,18677,1331,,,5490.0,,,,12665.0,271,2597,,,,, +141495,9034,2273,4972,4053,1331,,,5490.0,,,,10150.0,7739,5531,,,,, +46013,665,7230,4975,8765,1331,,,5490.0,,,,8632.0,7140,9976,,,,, +27580,665,6866,10230,22399,1331,,,5490.0,,,,11228.0,11880,1768,,,,, +31663,665,1004,6701,19159,1331,,,5490.0,,,,12712.0,6220,7308,,,,, +53487,665,9425,9168,23137,1331,,,5490.0,,,,3789.0,954,6242,,,,, +96920,665,8499,7684,24162,1331,,,5490.0,,,,2125.0,6421,2112,,,,, +140655,665,566,11289,23244,1331,,,5490.0,,,,8937.0,496,7067,,,,, +22671,665,3637,7814,14867,1331,,,5490.0,,,,3367.0,5309,5692,,,,, +73299,665,5987,11801,10623,1331,,,5490.0,,,,9541.0,8239,8155,,,,, +33464,9034,5772,8988,12680,1331,,,5490.0,,,,11630.0,5293,3180,,,,, +116430,665,8499,9520,13609,1331,,,5490.0,,,,8390.0,6421,2112,,,,, +42612,665,566,11258,18677,1331,,,5490.0,,,,12665.0,496,7067,,,,, +151800,665,2123,6995,21994,1331,,,5490.0,,,,8156.0,8751,11099,,,,, +66963,665,5987,757,20626,1331,,,5490.0,,,,2315.0,11454,8747,,,,, +20955,665,9675,757,20626,1331,,,5490.0,,,,2315.0,229,9044,,,,, +29389,665,6866,9591,9107,1331,,,5490.0,,,,8111.0,11880,1768,,,,, +47723,665,8881,3877,22319,1331,,,5490.0,,,,12932.0,9490,836,,,,, +145483,665,8220,11289,23244,1331,,,5490.0,,,,8937.0,4645,7332,,,,, +148716,665,5987,1387,17146,1331,,,5490.0,,,,1679.0,4102,192,,,,, +60966,665,6866,6217,3474,1331,,,5490.0,,,,7540.0,11880,1768,,,,, +74964,665,8220,6705,10269,1331,,,5490.0,,,,6992.0,4645,7332,,,,, +111786,665,566,757,20626,1331,,,5490.0,,,,2315.0,496,7067,,,,, +41338,665,9675,10799,349,1331,,,5490.0,,,,3164.0,5538,9044,,,,, +27522,665,9675,6705,10269,1331,,,5490.0,,,,6992.0,229,9044,,,,, +64564,665,3637,8188,3756,1331,,,5490.0,,,,3488.0,4974,7184,,,,, +69978,665,7565,6701,19159,1331,,,5490.0,,,,12712.0,293,5494,,,,, +34968,665,3637,7684,24162,1331,,,5490.0,,,,2125.0,5309,5692,,,,, +23218,665,3637,245,13206,1331,,,5490.0,,,,,5309,5692,,,,, +71119,665,8881,2149,3205,1331,,,5490.0,,,,6073.0,9490,836,,,,, +66535,665,566,6705,10269,1331,,,5490.0,,,,6992.0,496,7067,,,,, +65054,665,11121,11258,18677,1331,,,5490.0,,,,12665.0,4843,5773,,,,, +74272,665,566,11554,17750,1331,,,5490.0,,,,6910.0,496,7067,,,,, +110821,665,9675,2960,7987,1331,,,5490.0,,,,3465.0,3906,3214,,,,, +146780,665,3637,12397,21443,1331,,,5490.0,,,,7050.0,6289,5692,,,,, +53661,665,5987,11289,23244,1331,,,5490.0,,,,8937.0,6804,10990,,,,, +115297,9034,3637,10182,380,1331,,,5490.0,,,,9044.0,11463,7184,,,,, +88141,665,8220,11258,18677,1331,,,5490.0,,,,12665.0,4645,7332,,,,, +108101,665,5987,6705,10269,1331,,,5490.0,,,,6992.0,271,2597,,,,, +152503,665,5987,11554,17750,1331,,,5490.0,,,,6910.0,271,2597,,,,, +88754,665,5987,2960,7987,1331,,,5490.0,,,,3465.0,271,2597,,,,, +17476,9034,2273,12445,8047,1331,,,5490.0,,,,13143.0,7739,5531,,,,, +1955,665,3637,3363,666,1331,,,5490.0,,,,7948.0,6289,5692,,,,, +118939,665,3637,10248,5700,1331,,,5490.0,,,,5350.0,6289,5692,,,,, +115691,665,5987,7754,13286,1331,,,5490.0,,,,13355.0,4102,192,,,,, +111900,665,5987,11554,17750,1331,,,5490.0,,,,6910.0,6804,10990,,,,, +99133,665,4351,11554,17750,1331,,,5490.0,,,,6910.0,3158,3294,,,,, +33439,665,4351,10248,5700,1331,,,5490.0,,,,5350.0,10401,2940,,,,, +76750,665,4351,11258,18677,1331,,,5490.0,,,,12665.0,3955,4266,,,,, +101677,665,4351,11554,17750,1331,,,5490.0,,,,6910.0,3955,4266,,,,, +7658,665,4351,757,20626,1331,,,5490.0,,,,2315.0,7938,758,,,,, +8349,665,4351,11289,23244,1331,,,5490.0,,,,8937.0,3955,4266,,,,, +83441,665,4351,2960,7987,1331,,,5490.0,,,,3465.0,3955,4266,,,,, +31987,665,4351,757,20626,1331,,,5490.0,,,,2315.0,3955,4266,,,,, +149415,7552,10002,9293,1136,1331,,,5490.0,,,,13107.0,9301,4502,,,,, +109005,889,3080,2677,11631,1331,,,5490.0,,,,,12081,1503,,,,, +51503,889,255,1957,19548,1331,,,5490.0,,,,,10678,7672,,,,, +92084,584,8881,933,23961,1331,,,5490.0,,,,5399.0,1933,836,,,,, +20482,584,8220,286,19980,1331,,,5490.0,,,,9158.0,2464,7332,,,,, +142288,584,3637,6366,21753,1331,,,5490.0,,,,,6548,7716,,,,, +26495,584,8220,7954,2585,1331,,,5490.0,,,,9965.0,2464,7332,,,,, +44120,889,5772,12734,19047,1331,,,5490.0,,,,,4378,10734,,,,, +25470,584,8220,6297,10755,1331,,,5490.0,,,,9721.0,2464,7332,,,,, +55155,584,8220,12707,5062,1331,,,5490.0,,,,10641.0,2464,7332,,,,, +59236,584,3228,8590,11654,1331,,,5490.0,,,,11500.0,6818,10843,,,,, +51768,584,7973,913,21672,1331,,,5490.0,,,,3441.0,7416,7132,,,,, +46767,889,5987,1789,4216,1331,,,5490.0,,,,,4102,192,,,,, +98773,584,8220,8901,6846,1331,,,5490.0,,,,1663.0,2464,7332,,,,, +102018,889,1815,995,12664,1331,,,5490.0,,,,4139.0,7236,6845,,,,, +16147,889,9425,3510,1739,1331,,,5490.0,,,,3252.0,2685,6242,,,,, +37396,889,4351,4335,5449,1331,,,5490.0,,,,11376.0,2443,7969,,,,, +110428,889,3637,8343,20660,1331,,,5490.0,,,,3918.0,5309,5692,,,,, +11390,889,8609,2960,7987,1331,,,5490.0,,,,,8379,1288,,,,, +18726,584,2123,2873,3314,1331,,,5490.0,,,,7270.0,9951,1400,,,,, +8677,889,5987,2836,20839,1331,,,5490.0,,,,11801.0,4102,192,,,,, +31211,889,5904,1957,19548,1331,,,5490.0,,,,,2790,3686,,,,, +123923,584,8220,693,17394,1331,,,5490.0,,,,,2464,7332,,,,, +88350,584,8220,1675,17892,1331,,,5490.0,,,,1614.0,2464,7332,,,,, +130425,584,8220,11192,21322,1331,,,5490.0,,,,11314.0,4645,7332,,,,, +17957,584,8220,913,21672,1331,,,5490.0,,,,3441.0,2464,7332,,,,, +148169,889,4351,4162,20465,1331,,,5490.0,,,,8503.0,4435,7969,,,,, +46432,889,9425,6584,13541,1331,,,5490.0,,,,6965.0,2685,6242,,,,, +117012,889,4351,765,5507,1331,,,5490.0,,,,12148.0,1376,374,,,,, +127289,889,1815,9018,16763,1331,,,5490.0,,,,9990.0,3899,6845,,,,, +129839,889,8499,7312,24175,1331,,,5490.0,,,,9424.0,6421,2112,,,,, +137161,889,3637,6416,9237,1331,,,5490.0,,,,1274.0,5309,5692,,,,, +86281,889,5987,564,14434,1331,,,5490.0,,,,,4102,192,,,,, +121552,889,9425,12206,19851,1331,,,5490.0,,,,10580.0,2685,6242,,,,, +5523,584,8220,8187,8921,1331,,,5490.0,,,,5043.0,2464,7332,,,,, +131839,584,8220,3520,5888,1331,,,5490.0,,,,10633.0,2464,7332,,,,, +75866,584,2123,6883,20216,1331,,,5490.0,,,,6350.0,8751,11099,,,,, +71809,889,5987,6911,21649,1331,,,5490.0,,,,1570.0,342,192,,,,, +134463,889,10181,3070,14517,1331,,,5490.0,,,,2327.0,10757,6956,,,,, +39004,584,8220,9583,4888,1331,,,5490.0,,,,5606.0,2464,7332,,,,, +89639,889,3637,12206,19851,1331,,,5490.0,,,,10580.0,5309,5692,,,,, +73506,8266,2704,2555,12270,1331,,,5490.0,,,,9705.0,9842,670,,,,, +139715,2487,10289,8577,23785,1331,,,5490.0,,,,2909.0,11400,5264,,,,, +92298,2487,10289,2836,20839,1331,,,5490.0,,,,12203.0,11400,5264,,,,, +112860,2487,10289,5502,18949,1331,,,5490.0,,,,5401.0,11400,5264,,,,, +94341,2487,10289,10960,23341,1331,,,5490.0,,,,12230.0,11400,5264,,,,, +84661,2487,5987,8577,23785,1331,,,5490.0,,,,2909.0,11655,8155,,,,, +127815,8266,2704,8418,22417,1331,,,5490.0,,,,5987.0,9842,670,,,,, +114637,2487,9284,11406,21765,1331,,,5490.0,,,,8760.0,2051,9025,,,,, +120662,8266,2734,2555,12270,1331,,,5490.0,,,,3356.0,6992,3891,,,,, +77654,2487,10289,10543,11301,1331,,,5490.0,,,,5797.0,11400,5264,,,,, +52859,2487,1941,865,18965,1331,,,5490.0,,,,1317.0,10167,10122,,,,, +99667,2487,5987,10402,21256,1331,,,5490.0,,,,4198.0,11655,8155,,,,, +39458,2487,10289,10402,21256,1331,,,5490.0,,,,4198.0,11400,5264,,,,, +137598,2487,2123,7653,5315,1331,,,5490.0,,,,12212.0,8088,8147,,,,, +42479,2487,5987,5502,18949,1331,,,5490.0,,,,5401.0,11655,8155,,,,, +47641,2487,562,7653,5315,1331,,,5490.0,,,,12212.0,9598,914,,,,, +138942,2487,6587,10960,23341,1331,,,5490.0,,,,12230.0,10651,6830,,,,, +3496,2487,10289,12854,1260,1331,,,5490.0,,,,3740.0,11400,5264,,,,, +66663,2487,3637,3502,7058,1331,,,5490.0,,,,7985.0,5309,5692,,,,, +144416,2487,10289,6526,13869,1331,,,5490.0,,,,4204.0,11400,5264,,,,, +150343,229,5248,8255,21747,1331,,,5490.0,,,,5302.0,507,1962,,,,, +52099,229,5987,4310,21923,1331,,,5490.0,,,,7701.0,4102,192,,,,, +121789,229,566,8255,21747,1331,,,5490.0,,,,5302.0,496,7067,,,,, +52153,229,1004,4824,16715,1331,,,5490.0,,,,8138.0,10961,10789,,,,, +95103,229,5987,8913,18400,1331,,,5490.0,,,,9117.0,4102,192,,,,, +102328,229,6847,7047,20430,1331,,,5490.0,,,,3158.0,5605,8226,,,,, +126774,229,7565,4824,16715,1331,,,5490.0,,,,8138.0,776,3975,,,,, +127133,229,4351,8255,21747,1331,,,5490.0,,,,5302.0,9451,8766,,,,, +69764,229,7565,7636,3074,1331,,,5490.0,,,,10449.0,293,5494,,,,, +93154,5895,5987,8255,21747,1331,,,5490.0,,,,,4102,192,,,,, +45928,229,5987,8255,21747,1331,,,5490.0,,,,5302.0,11454,8747,,,,, +9543,229,5987,2633,5260,1331,,,5490.0,,,,7738.0,4102,192,,,,, +41640,229,6810,8396,4809,1331,,,5490.0,,,,7303.0,471,4817,,,,, +10654,229,5987,4014,8773,1331,,,5490.0,,,,8388.0,4102,192,,,,, +16687,229,6810,5258,10596,1331,,,5490.0,,,,4544.0,471,4817,,,,, +25069,229,6810,5534,7595,1331,,,5490.0,,,,7577.0,471,4817,,,,, +137225,229,8598,8255,21747,1331,,,5490.0,,,,5302.0,1179,10074,,,,, +88948,229,4351,8255,21747,1331,,,5490.0,,,,5302.0,7108,6713,,,,, +39653,229,1004,8255,21747,1331,,,5490.0,,,,5302.0,854,4938,,,,, +137353,229,5248,4824,16715,1331,,,5490.0,,,,8138.0,3288,5861,,,,, +32231,229,9305,8255,21747,1331,,,5490.0,,,,5302.0,12050,8552,,,,, +48748,229,6810,4619,11842,1331,,,5490.0,,,,,471,4817,,,,, +24139,229,1004,4824,16715,1331,,,5490.0,,,,8138.0,854,4938,,,,, +119203,229,6810,4824,16715,1331,,,5490.0,,,,8138.0,7162,4817,,,,, +37449,229,5044,4824,16715,1331,,,5490.0,,,,8138.0,8452,1985,,,,, +98305,229,5987,8255,21747,1331,,,5490.0,,,,5302.0,271,2597,,,,, +106283,229,5987,564,14434,1331,,,5490.0,,,,316.0,4102,192,,,,, +83001,229,1004,8255,21747,1331,,,5490.0,,,,5302.0,10961,10789,,,,, +42771,229,5987,9913,1154,1331,,,5490.0,,,,4659.0,4102,192,,,,, +139650,229,9305,7636,3074,1331,,,5490.0,,,,10449.0,6002,5633,,,,, +105281,229,6810,12756,8578,1331,,,5490.0,,,,6332.0,471,4817,,,,, +140570,229,3087,11118,13079,1331,,,5490.0,,,,9512.0,9062,3458,,,,, +151531,5895,5987,11997,21650,1331,,,5490.0,,,,11501.0,4102,192,,,,, +58777,229,1004,7636,3074,1331,,,5490.0,,,,10449.0,6220,7308,,,,, +84286,229,1941,7636,3074,1331,,,5490.0,,,,10449.0,10167,10122,,,,, +51431,7508,183,12097,4178,1331,,16669.0,,,,,2792.0,7396,3649,1849.0,5535.0,,, +6859,7508,3575,5313,16218,1331,,16669.0,,,,,2048.0,8641,4813,1849.0,5535.0,,, +122615,7508,9051,5313,16218,1331,,16669.0,,,,,2048.0,5459,8021,1849.0,5535.0,,, +29681,7508,8812,8043,16726,1331,,16669.0,,,,,6360.0,11218,1352,1849.0,5535.0,,, +81739,7508,8006,8043,16726,1331,,16669.0,,,,,6360.0,11570,2035,1849.0,5535.0,,, +75396,7508,1264,12103,21026,1331,,16669.0,,,,,2319.0,294,6737,1849.0,5535.0,,, +1615,7508,5987,5313,16218,1331,,16669.0,,,,,2048.0,971,192,1849.0,5535.0,,, +81559,7508,4351,7788,1933,1331,,16669.0,,,,,9700.0,6346,2294,1849.0,5535.0,,, +142462,7508,4351,12103,21026,1331,,16669.0,,,,,2319.0,6346,2294,1849.0,5535.0,,, +54948,7508,4351,1317,8742,1331,,16669.0,,,,,4893.0,9550,4725,1849.0,5535.0,,, +91359,7508,4351,5313,16218,1331,,16669.0,,,,,2048.0,6346,2294,1849.0,5535.0,,, +102634,2253,5987,11649,9206,1331,,,5490.0,,,,12897.0,342,192,,,,, +57404,2253,5987,12081,20227,1331,,,5490.0,,,,3837.0,4102,192,,,,, +59301,2253,7127,6349,18224,1331,,,5490.0,,,,342.0,5537,6041,,,,, +46314,2253,8644,7278,22139,1331,,,5490.0,,,,5122.0,2352,1040,,,,, +99875,2253,8881,9178,10472,1331,,,5490.0,,,,2495.0,9490,836,,,,, +101072,2253,6743,10798,14751,1331,,,5490.0,,,,9281.0,5140,4357,,,,, +54214,2253,8644,12081,20227,1331,,,5490.0,,,,4620.0,2352,1040,,,,, +29372,2253,5987,7278,22139,1331,,,5490.0,,,,7573.0,4102,192,,,,, +16606,2253,5987,757,20626,1331,,,5490.0,,,,11146.0,4102,192,,,,, +36083,2253,5987,5502,18949,1331,,,5490.0,,,,9128.0,342,192,,,,, +90263,2253,1042,457,8629,1331,,,5490.0,,,,10905.0,10444,3856,,,,, +141677,2253,5622,6349,18224,1331,,,5490.0,,,,342.0,559,1123,,,,, +112438,2253,7973,1690,15078,1331,,,5490.0,,,,9394.0,11760,10316,,,,, +40469,2253,1941,10102,2413,1331,,,5490.0,,,,8328.0,10167,10122,,,,, +61710,2253,5987,9456,2238,1331,,,5490.0,,,,7941.0,4102,192,,,,, +146689,2253,4555,9643,3647,1331,,,5490.0,,,,11276.0,5174,241,,,,, +123770,2253,8644,6203,22276,1331,,,5490.0,,,,4005.0,2352,1040,,,,, +145101,2253,1941,5436,3310,1331,,,5490.0,,,,2108.0,898,10122,,,,, +16732,2253,1941,10245,6616,1331,,,5490.0,,,,11097.0,10167,10122,,,,, +141264,2253,5987,6203,22276,1331,,,5490.0,,,,4005.0,11454,8747,,,,, +33788,2253,2734,457,8629,1331,,,5490.0,,,,10905.0,6992,3891,,,,, +98588,2253,2734,8880,6505,1331,,,5490.0,,,,12136.0,84,6098,,,,, +55856,2253,4351,6349,18224,1331,,,5490.0,,,,342.0,1557,1323,,,,, +90501,2253,4351,1690,15078,1331,,,5490.0,,,,9394.0,1557,1323,,,,, +17854,2253,4351,6349,18224,1331,,,5490.0,,,,342.0,463,7738,,,,, +93949,2253,4351,1690,15078,1331,,,5490.0,,,,9394.0,463,7738,,,,, +83743,2253,4351,12597,22984,1331,,,5490.0,,,,8497.0,463,7738,,,,, +122179,2253,4351,1464,7793,1331,,,5490.0,,,,4455.0,463,7738,,,,, +72134,2253,4351,9643,3647,1331,,,5490.0,,,,11276.0,463,7738,,,,, +82182,2253,4351,4589,9396,1331,,,5490.0,,,,11234.0,463,7738,,,,, +98134,2253,4351,6355,22424,1331,,,5490.0,,,,5801.0,463,7738,,,,, +105276,2253,4351,457,8629,1331,,,5490.0,,,,10905.0,1272,2294,,,,, +80892,2253,4351,1983,18523,1331,,,5490.0,,,,13679.0,463,7738,,,,, +138861,2253,4351,3131,13151,1331,,,5490.0,,,,9955.0,463,7738,,,,, +125680,2253,4351,1464,7793,1331,,,5490.0,,,,4455.0,10344,10492,,,,, +62465,2253,4351,9808,2635,1331,,,5490.0,,,,5887.0,463,7738,,,,, +43261,2253,4351,7015,21437,1331,,,5490.0,,,,8370.0,7216,5174,,,,, +65994,2253,4351,8428,6179,1331,,,5490.0,,,,1370.0,463,7738,,,,, +44338,2253,4351,8579,8565,1331,,,5490.0,,,,5190.0,1557,1323,,,,, +97523,2253,4351,6203,22276,1331,,,5490.0,,,,4005.0,10401,2940,,,,, +88274,2253,4351,1716,23720,1331,,,5490.0,,,,1242.0,7216,5174,,,,, +84947,2253,4351,10804,16846,1331,,,5490.0,,,,7350.0,463,7738,,,,, +78897,2253,4351,2276,12297,1331,,,5490.0,,,,4111.0,463,7738,,,,, +19460,2253,4351,8801,10295,1331,,,5490.0,,,,10979.0,463,7738,,,,, +45054,2253,4351,12840,4293,1331,,,5490.0,,,,2696.0,7457,1323,,,,, +3724,2253,4351,8579,8565,1331,,,5490.0,,,,5190.0,6544,2324,,,,, +88025,2253,4351,9643,3647,1331,,,5490.0,,,,11276.0,4132,10267,,,,, +20599,6920,5987,12639,11106,1331,,,5490.0,,,,3916.0,342,192,,,,, +149124,4834,3962,5830,3432,1331,,,5490.0,,,,7824.0,1636,4542,,,,, +63370,4834,3962,11863,11571,1331,,,5490.0,,,,9856.0,9269,2063,,,,, +60927,4834,4880,8645,5004,1331,,,5490.0,,,,7989.0,8711,5316,,,,, +52404,4834,3962,1463,13009,1331,,,5490.0,,,,12139.0,4275,2063,,,,, +18949,6920,10181,1276,22405,1331,,,5490.0,,,,2705.0,2732,566,,,,, +67625,4834,3962,963,4234,1331,,,5490.0,,,,13601.0,9269,2063,,,,, +51784,4834,3962,1282,18682,1331,,,5490.0,,,,8989.0,4275,2063,,,,, +128512,4834,1004,8645,5004,1331,,,5490.0,,,,7989.0,10961,10789,,,,, +131302,6920,3087,5659,7261,1331,,,5490.0,,,,5010.0,6673,9982,,,,, +24005,4834,1004,8645,5004,1331,,,5490.0,,,,7989.0,854,4938,,,,, +52479,4834,7565,8645,5004,1331,,,5490.0,,,,7989.0,293,5494,,,,, +5760,4834,3080,7073,23818,1331,,,5490.0,,,,1898.0,8631,7975,,,,, +147602,4834,10181,1073,18068,1331,,,5490.0,,,,2651.0,7220,8765,,,,, +10640,4834,10181,9811,22231,1331,,,5490.0,,,,589.0,9884,8765,,,,, +93595,4834,3080,1073,18068,1331,,,5490.0,,,,2651.0,5484,1813,,,,, +16943,6920,2734,12639,11106,1331,,,5490.0,,,,3916.0,1045,5056,,,,, +127327,4834,3962,7853,12266,1331,,,5490.0,,,,11693.0,4275,2063,,,,, +54714,4834,3962,5830,3432,1331,,,5490.0,,,,7824.0,8342,6187,,,,, +60188,4834,225,798,18716,1331,,,5490.0,,,,10891.0,9529,6714,,,,, +113924,4834,3962,11750,20208,1331,,,5490.0,,,,13110.0,1636,4542,,,,, +24149,6920,7565,924,21423,1331,,,5490.0,,,,3325.0,293,5494,,,,, +120692,4834,3962,6041,2660,1331,,,5490.0,,,,4909.0,4275,2063,,,,, +123053,6920,5987,924,21423,1331,,,5490.0,,,,3325.0,342,192,,,,, +78517,4834,3962,9735,13036,1331,,,5490.0,,,,2852.0,4275,2063,,,,, +136540,4834,225,12816,2955,1331,,,5490.0,,,,13282.0,9529,6714,,,,, +56770,4834,5248,8645,5004,1331,,,5490.0,,,,7989.0,7110,9680,,,,, +115892,4834,2495,8645,5004,1331,,,5490.0,,,,7989.0,10328,5186,,,,, +149330,9633,7230,8490,11530,1331,,16669.0,,,,,13068.0,11185,10988,1849.0,5535.0,,, +120230,9633,7230,1947,19476,1331,,16669.0,,,,,6918.0,11185,10988,1849.0,5535.0,,, +118473,9633,4351,3273,21867,1331,,16669.0,,,,,6912.0,5611,3167,1849.0,5535.0,,, +41457,9633,7230,4110,20602,1331,,16669.0,,,,,9771.0,11185,10988,1849.0,5535.0,,, +55276,9633,7230,1947,12291,1331,,16669.0,,,,,4856.0,11185,10988,1849.0,5535.0,,, +42791,7659,4351,11141,10173,1331,,16669.0,,,,,10857.0,6346,2294,1849.0,5535.0,,, +54119,7659,4351,5193,15553,1331,,16669.0,,,,,13571.0,8007,118,1849.0,5535.0,,, +51285,7659,9675,4517,14075,1331,,16669.0,,,,,6033.0,1041,3214,1849.0,5535.0,,, +55583,7659,4351,4517,14075,1331,,16669.0,,,,,6033.0,6346,2294,1849.0,5535.0,,, +152488,10034,10574,12103,14043,1331,,,5490.0,,,,10617.0,7922,1613,,,,, +98325,7659,3600,5193,15553,1331,,16669.0,,,,,13571.0,7600,1586,1849.0,5535.0,,, +136758,2700,255,2965,23642,1331,,,5490.0,,,,966.0,827,10862,,,,, +60119,7659,4351,3795,16087,1331,,16669.0,,,,,12477.0,6346,2294,1849.0,5535.0,,, +11703,2700,8644,3887,183,1331,,,5490.0,,,,6827.0,12129,45,,,,, +95659,2700,10181,2045,18201,1331,,,5490.0,,,,6978.0,2434,9641,,,,, +86997,7659,5987,1264,10453,1331,,16669.0,,,,,7937.0,692,2597,1849.0,5535.0,,, +134716,2700,4351,7671,19255,1331,,,5490.0,,,,10761.0,2498,9470,,,,, +34136,2700,4351,6769,23457,1331,,,5490.0,,,,216.0,2498,9470,,,,, +70662,2700,4351,7938,8240,1331,,,5490.0,,,,5022.0,3870,3127,,,,, +123762,7659,255,3795,16087,1331,,16669.0,,,,,12477.0,10889,7672,1849.0,5535.0,,, +58274,2700,2734,9260,19762,1331,,,5490.0,,,,1426.0,4526,9165,,,,, +148981,2700,3575,3887,183,1331,,,5490.0,,,,6827.0,5361,4813,,,,, +16736,2700,1042,3887,183,1331,,,5490.0,,,,6827.0,10444,3856,,,,, +95110,2700,4351,4122,20791,1331,,,5490.0,,,,1735.0,5236,7272,,,,, +43316,2700,4351,3887,183,1331,,,5490.0,,,,6827.0,7500,2632,,,,, +94283,2700,9433,3887,183,1331,,,5490.0,,,,6827.0,3548,6892,,,,, +72347,2700,4351,11289,23244,1331,,,5490.0,,,,7034.0,3870,3127,,,,, +94264,2700,8644,11697,12815,1331,,,5490.0,,,,7172.0,7817,11228,,,,, +70625,2700,4351,11289,23244,1331,,,5490.0,,,,7034.0,5236,7272,,,,, +118474,2700,4351,11289,23244,1331,,,5490.0,,,,7034.0,7500,2632,,,,, +92735,2700,2734,1463,13009,1331,,,5490.0,,,,8045.0,5513,11371,,,,, +93664,2700,4351,1904,14127,1331,,,5490.0,,,,5893.0,1272,2294,,,,, +126651,2700,4351,5478,3145,1331,,,5490.0,,,,11028.0,5236,7272,,,,, +2601,2700,1296,3887,183,1331,,,5490.0,,,,6827.0,6175,266,,,,, +121941,2700,4351,1904,14127,1331,,,5490.0,,,,5893.0,5236,7272,,,,, +131680,2700,566,1904,14127,1331,,,5490.0,,,,5893.0,496,7067,,,,, +64594,2700,4351,11697,12815,1331,,,5490.0,,,,7172.0,3870,3127,,,,, +140307,2700,4351,2769,16084,1331,,,5490.0,,,,3265.0,1272,2294,,,,, +119028,2700,2734,2965,23642,1331,,,5490.0,,,,4475.0,84,6098,,,,, +32014,2700,4351,5487,9554,1331,,,5490.0,,,,6103.0,2498,9470,,,,, +15371,2700,5987,1904,14127,1331,,,5490.0,,,,5893.0,271,2597,,,,, +42228,2700,2734,10486,18241,1331,,,5490.0,,,,885.0,4526,9165,,,,, +49765,2700,5987,12229,19094,1331,,,5490.0,,,,11759.0,271,2597,,,,, +95955,2700,2734,4392,9298,1331,,,5490.0,,,,13378.0,5513,11371,,,,, +86113,2700,10347,8406,628,1331,,,5490.0,,,,9199.0,9013,2475,,,,, +146880,2700,4351,10496,17766,1331,,,5490.0,,,,264.0,2498,9470,,,,, +31815,2700,4351,2769,16084,1331,,,5490.0,,,,3265.0,10663,9470,,,,, +146126,2700,4351,10496,17766,1331,,,5490.0,,,,264.0,187,7272,,,,, +117743,2700,3493,3887,183,1331,,,5490.0,,,,6827.0,5926,2051,,,,, +50121,2700,10181,11697,12815,1331,,,5490.0,,,,7172.0,514,1799,,,,, +2371,2700,4351,12229,19094,1331,,,5490.0,,,,11759.0,5236,7272,,,,, +127277,2700,566,11289,23244,1331,,,5490.0,,,,7034.0,496,7067,,,,, +33457,2700,4351,12499,4680,1331,,,5490.0,,,,10434.0,9042,2097,,,,, +102325,2700,4351,7671,19255,1331,,,5490.0,,,,10761.0,9042,2097,,,,, +72241,2700,4351,11289,23244,1331,,,5490.0,,,,7034.0,1272,2294,,,,, +105384,2700,5987,10719,8159,1331,,,5490.0,,,,3212.0,342,192,,,,, +15937,2700,4351,10207,21944,1331,,,5490.0,,,,9818.0,9042,2097,,,,, +132531,2700,9675,2769,16084,1331,,,5490.0,,,,3265.0,420,2969,,,,, +62830,2700,4351,2769,16084,1331,,,5490.0,,,,3265.0,11562,2097,,,,, +20912,2700,7699,4122,20791,1331,,,5490.0,,,,1735.0,6864,3621,,,,, +88682,2700,4351,12499,4680,1331,,,5490.0,,,,10434.0,2498,9470,,,,, +77519,2700,4351,10207,21944,1331,,,5490.0,,,,9818.0,2498,9470,,,,, +17620,2700,4351,11697,12815,1331,,,5490.0,,,,7172.0,7500,2632,,,,, +35366,2700,4351,4122,20791,1331,,,5490.0,,,,1735.0,1272,2294,,,,, +18793,2700,10574,11697,12815,1331,,,5490.0,,,,7172.0,5388,1613,,,,, +9423,2700,4351,8447,11940,1331,,,5490.0,,,,7421.0,9042,2097,,,,, +133036,2700,255,1018,20367,1331,,,5490.0,,,,8237.0,827,10862,,,,, +44732,2700,2431,2045,18201,1331,,,5490.0,,,,6978.0,4152,6170,,,,, +107150,2700,4351,7938,8240,1331,,,5490.0,,,,5022.0,5236,7272,,,,, +117279,2700,5987,10496,17766,1331,,,5490.0,,,,264.0,4102,192,,,,, +125689,2700,4351,7938,8240,1331,,,5490.0,,,,5022.0,10663,9470,,,,, +76836,2700,8600,11289,23244,1331,,,5490.0,,,,7034.0,8747,7989,,,,, +7573,2700,566,12229,19094,1331,,,5490.0,,,,11759.0,496,7067,,,,, +94634,2700,9675,12229,19094,1331,,,5490.0,,,,11759.0,420,2969,,,,, +32886,2700,4351,12229,19094,1331,,,5490.0,,,,11759.0,10663,9470,,,,, +104258,2700,4351,9465,481,1331,,,5490.0,,,,7767.0,2498,9470,,,,, +30325,2700,5987,7938,8240,1331,,,5490.0,,,,5022.0,271,2597,,,,, +75482,2700,5987,12206,19851,1331,,,5490.0,,,,11913.0,342,192,,,,, +75245,2700,4351,4122,20791,1331,,,5490.0,,,,1735.0,3870,3127,,,,, +4022,2700,4351,12748,18235,1331,,,5490.0,,,,149.0,9042,2097,,,,, +71386,2700,4351,1994,9287,1331,,,5490.0,,,,12875.0,5236,7272,,,,, +139341,2700,4351,6769,23457,1331,,,5490.0,,,,216.0,9042,2097,,,,, +132605,7659,4351,8374,14572,1331,,16669.0,,,,,12842.0,527,8825,1849.0,5535.0,,, +96863,2700,4351,8447,11940,1331,,,5490.0,,,,7421.0,2498,9470,,,,, +93150,2700,5987,11289,23244,1331,,,5490.0,,,,7034.0,271,2597,,,,, +29482,2700,10347,12229,19094,1331,,,5490.0,,,,2544.0,2339,2475,,,,, +116558,2700,4351,7938,8240,1331,,,5490.0,,,,5022.0,1272,2294,,,,, +4684,2700,10181,3887,183,1331,,,5490.0,,,,6827.0,514,1799,,,,, +73909,2700,4351,12748,18235,1331,,,5490.0,,,,149.0,2498,9470,,,,, +97855,7659,4351,1264,10453,1331,,16669.0,,,,,7937.0,6346,2294,1849.0,5535.0,,, +109491,7659,9675,3795,16087,1331,,16669.0,,,,,12477.0,1041,3214,1849.0,5535.0,,, +146722,2700,566,3887,183,1331,,,5490.0,,,,6827.0,496,7067,,,,, +91197,2700,4351,12229,19094,1331,,,5490.0,,,,11759.0,1272,2294,,,,, +38628,2700,4351,4046,3456,1331,,,5490.0,,,,2833.0,5236,7272,,,,, +79700,2700,4351,10207,21944,1331,,,5490.0,,,,9818.0,187,7272,,,,, +143477,2700,1042,4122,20791,1331,,,5490.0,,,,1735.0,10444,3856,,,,, +92361,2700,4351,12590,4092,1331,,,5490.0,,,,11086.0,10663,9470,,,,, +14997,2700,4351,11697,12815,1331,,,5490.0,,,,7172.0,1272,2294,,,,, +63342,2700,4351,1496,21359,1331,,,5490.0,,,,472.0,2498,9470,,,,, +120528,2700,4351,3281,19634,1331,,,5490.0,,,,9852.0,2498,9470,,,,, +79324,8152,6810,900,10177,1331,,,5490.0,,,,6652.0,5996,11201,,,,, +136801,2307,2734,12004,5958,1331,,,5490.0,,,,282.0,11653,5056,,,,, +73243,116,4351,11283,14033,1331,,,5490.0,,,,1557.0,8883,1419,,,,, +138438,2307,2734,1923,18654,1331,,,5490.0,,,,7800.0,10721,10970,,,,, +66709,2307,2734,5753,4626,1331,,,5490.0,,,,11576.0,11653,5056,,,,, +62643,8152,4351,4122,20791,1331,,,5490.0,,,,8213.0,3354,3113,,,,, +146223,8152,4351,6549,14351,1331,,,5490.0,,,,8146.0,3354,3113,,,,, +79298,2307,2734,1923,18654,1331,,,5490.0,,,,7800.0,1045,5056,,,,, +79119,8152,10002,1986,23382,1331,,,5490.0,,,,12798.0,9301,4502,,,,, +60837,8152,10181,924,21423,1331,,,5490.0,,,,1106.0,5222,4027,,,,, +91118,116,4351,9499,19378,1331,,,5490.0,,,,,7583,7509,,,,, +82859,8152,3080,5506,13264,1331,,,5490.0,,,,11994.0,769,9646,,,,, +17002,2307,2734,2555,12270,1331,,,5490.0,,,,8508.0,1045,5056,,,,, +139557,8152,3228,5006,4393,1331,,,5490.0,,,,667.0,6818,10843,,,,, +82902,8152,255,10201,21330,1331,,,5490.0,,,,,10678,7672,,,,, +72158,8152,10181,1923,18654,1331,,,5490.0,,,,4652.0,5222,4027,,,,, +39205,8152,10181,6549,14351,1331,,,5490.0,,,,8146.0,5222,4027,,,,, +108802,8152,4351,7408,12272,1331,,,5490.0,,,,10591.0,3354,3113,,,,, +44813,8152,7973,9063,10243,1331,,,5490.0,,,,9785.0,11760,10316,,,,, +146404,8152,10181,7502,17073,1331,,,5490.0,,,,2532.0,10609,4027,,,,, +138344,8152,10181,8523,6872,1331,,,5490.0,,,,7972.0,5222,4027,,,,, +93483,8152,2123,715,1969,1331,,,5490.0,,,,5980.0,4701,11099,,,,, +60977,2307,2734,12004,5958,1331,,,5490.0,,,,282.0,8450,11159,,,,, +128515,8152,10181,8987,8977,1331,,,5490.0,,,,13574.0,5222,4027,,,,, +9145,8152,5091,1276,22405,1331,,,5490.0,,,,10326.0,1476,1616,,,,, +46575,8152,1296,10201,21330,1331,,,5490.0,,,,,8029,266,,,,, +146423,8152,4351,5506,13264,1331,,,5490.0,,,,11994.0,3354,3113,,,,, +64620,8152,10181,7408,12272,1331,,,5490.0,,,,10591.0,5222,4027,,,,, +51828,550,10802,5455,5898,1331,,,5490.0,,,,13379.0,2737,1719,,,,, +56304,10728,566,1893,1726,1331,,,5490.0,,,,4674.0,496,7067,,,,, +108541,10728,4351,2393,7359,1331,,,5490.0,,,,2765.0,5566,6758,,,,, +49558,2750,2734,4217,20407,1331,,,5490.0,,,,2827.0,7698,1035,,,,, +85216,10133,4233,12824,4546,1331,,,5490.0,,,,6321.0,3016,6002,,,,, +91572,5111,4493,3763,10600,1331,,,5490.0,,,,4480.0,6075,3507,,,,, +73956,10728,1435,6300,5515,1331,,,5490.0,,,,916.0,7993,7839,,,,, +48984,10275,3637,10121,19454,1331,,,5490.0,,,,8210.0,11463,7184,,,,, +142573,10728,566,219,23469,1331,,,5490.0,,,,11746.0,496,7067,,,,, +64314,10728,955,9090,13934,1331,,,5490.0,,,,1623.0,9552,3593,,,,, +29365,550,1004,11019,22714,1331,,,5490.0,,,,1069.0,10961,10789,,,,, +49651,10728,5987,12068,15316,1331,,,5490.0,,,,11605.0,342,192,,,,, +29895,10728,1004,219,23469,1331,,,5490.0,,,,11746.0,10961,10789,,,,, +129088,10133,7565,3166,23012,1331,,,5490.0,,,,3341.0,776,3975,,,,, +133995,10728,1941,11365,1678,1331,,,5490.0,,,,12444.0,10167,10122,,,,, +72054,10728,7583,566,5478,1331,,,5490.0,,,,3541.0,4980,3722,,,,, +118335,5111,5987,2894,21368,1331,,,5490.0,,,,13287.0,4102,192,,,,, +11262,2750,2734,7502,17073,1331,,,5490.0,,,,10124.0,2539,1131,,,,, +36486,550,10802,4217,20407,1331,,,5490.0,,,,3988.0,2737,1719,,,,, +47382,10728,4351,9090,13934,1331,,,5490.0,,,,6252.0,8212,637,,,,, +84012,905,4351,8139,3884,1331,,,5490.0,,,,10668.0,7216,5174,,,,, +87336,550,6923,4149,20375,1331,,,5490.0,,,,3288.0,12145,5318,,,,, +17728,2750,6810,6337,1859,1331,,,5490.0,,,,2816.0,12107,8083,,,,, +40296,10133,3962,4401,13613,1331,,,5490.0,,,,452.0,9648,8086,,,,, +73394,10728,8841,3781,594,1331,,,5490.0,,,,7968.0,993,4939,,,,, +118410,10728,1004,1893,1726,1331,,,5490.0,,,,4674.0,6220,7308,,,,, +16806,905,4448,9667,19409,1331,,,5490.0,,,,11331.0,8695,7688,,,,, +66479,10133,3962,6053,20022,1331,,,5490.0,,,,,12005,4247,,,,, +90328,10728,4448,2393,7359,1331,,,5490.0,,,,2765.0,8695,7688,,,,, +46784,10133,3080,7412,12094,1331,,,5490.0,,,,183.0,4263,2367,,,,, +82216,10728,562,219,23469,1331,,,5490.0,,,,11746.0,1203,6643,,,,, +412,10728,566,12068,15316,1331,,,5490.0,,,,11605.0,496,7067,,,,, +5045,10728,3087,3398,8402,1331,,,5490.0,,,,3757.0,9062,3458,,,,, +22956,550,2734,9500,16635,1331,,,5490.0,,,,2304.0,5574,169,,,,, +142801,2750,6810,12753,13301,1331,,,5490.0,,,,1862.0,10079,4473,,,,, +151340,10728,7665,6300,5515,1331,,,5490.0,,,,916.0,11748,4356,,,,, +67296,10728,7583,696,9377,1331,,,5490.0,,,,6107.0,6030,5989,,,,, +41923,10728,8600,6300,5515,1331,,,5490.0,,,,916.0,8747,7989,,,,, +117410,10728,8127,11242,9728,1331,,,5490.0,,,,11470.0,9838,570,,,,, +139897,10728,4448,924,21423,1331,,,5490.0,,,,10542.0,12228,8718,,,,, +6300,2750,2734,8041,6798,1331,,,5490.0,,,,711.0,7698,1035,,,,, +71404,10728,4351,3781,594,1331,,,5490.0,,,,7968.0,4928,8818,,,,, +77825,905,1004,5863,1194,1331,,,5490.0,,,,6118.0,6220,7308,,,,, +94778,10728,4448,10522,4738,1331,,,5490.0,,,,9127.0,8695,7688,,,,, +91779,10728,566,9293,1136,1331,,,5490.0,,,,13617.0,496,7067,,,,, +40277,550,6648,11019,22714,1331,,,5490.0,,,,1069.0,5281,1610,,,,, +22792,10728,1941,1876,19568,1331,,,5490.0,,,,3734.0,10167,10122,,,,, +136217,10728,7565,2393,7359,1331,,,5490.0,,,,2765.0,293,5494,,,,, +125663,10728,7230,11242,9728,1331,,,5490.0,,,,11470.0,11277,10988,,,,, +66229,10133,1004,3166,23012,1331,,,5490.0,,,,3341.0,10961,10789,,,,, +9095,905,6805,7302,15180,1331,,,5490.0,,,,8712.0,9060,7178,,,,, +73836,10728,11044,3731,16960,1331,,,5490.0,,,,9412.0,3423,10109,,,,, +116018,10728,7230,10095,3395,1331,,,5490.0,,,,3630.0,11277,10988,,,,, +121817,10728,4448,1893,1726,1331,,,5490.0,,,,4674.0,8695,7688,,,,, +63363,550,2734,10891,5037,1331,,,5490.0,,,,13689.0,84,6098,,,,, +136254,10728,5205,3731,16960,1331,,,5490.0,,,,9412.0,6478,844,,,,, +148707,10728,1435,11806,11363,1331,,,5490.0,,,,3869.0,7993,7839,,,,, +127694,10728,5987,1893,1726,1331,,,5490.0,,,,4674.0,11454,8747,,,,, +78192,10728,566,12498,1026,1331,,,5490.0,,,,6554.0,496,7067,,,,, +52646,550,7709,4631,16086,1331,,,5490.0,,,,2249.0,3948,2570,,,,, +129120,550,1004,4149,20375,1331,,,5490.0,,,,3288.0,10961,10789,,,,, +118757,10728,2123,4717,2168,1331,,,5490.0,,,,1820.0,8751,11099,,,,, +33593,10728,1004,424,1544,1331,,,5490.0,,,,8084.0,6220,7308,,,,, +135746,10728,4351,2393,7359,1331,,,5490.0,,,,2765.0,12161,4725,,,,, +8324,10728,4351,4419,19766,1331,,,5490.0,,,,10053.0,8212,637,,,,, +44090,550,4448,3893,7064,1331,,,5490.0,,,,4949.0,12228,8718,,,,, +1094,10728,1004,2393,7359,1331,,,5490.0,,,,2765.0,6220,7308,,,,, +79187,10699,3080,9293,1136,1331,,,5490.0,,,,7844.0,8879,11361,,,,, +99692,10133,225,7412,12094,1331,,,5490.0,,,,183.0,9529,6714,,,,, +7697,10133,2870,3166,23012,1331,,,5490.0,,,,3341.0,4289,4768,,,,, +5405,10728,4351,11242,9728,1331,,,5490.0,,,,11470.0,1759,10739,,,,, +152451,10728,2142,9293,1136,1331,,,5490.0,,,,13617.0,9372,9736,,,,, +135625,10728,7565,424,1544,1331,,,5490.0,,,,8084.0,293,5494,,,,, +19823,550,5193,2070,12178,1331,,,5490.0,,,,12437.0,10497,7014,,,,, +62394,5111,1296,7887,21946,1331,,,5490.0,,,,4969.0,8029,266,,,,, +81412,10728,6597,6300,5515,1331,,,5490.0,,,,916.0,3346,10111,,,,, +101501,550,1004,3893,7064,1331,,,5490.0,,,,4949.0,6220,7308,,,,, +128554,905,4448,5863,1194,1331,,,5490.0,,,,6118.0,8695,7688,,,,, +27047,550,2734,9990,10656,1331,,,5490.0,,,,12360.0,10721,10970,,,,, +112501,550,10802,8847,13219,1331,,,5490.0,,,,1657.0,8133,1719,,,,, +100620,550,5987,9990,10656,1331,,,5490.0,,,,12360.0,8239,8155,,,,, +137771,905,4351,11886,22117,1331,,,5490.0,,,,1362.0,5029,1419,,,,, +99485,905,7565,5863,1194,1331,,,5490.0,,,,6118.0,293,5494,,,,, +86584,10728,7565,3072,22322,1331,,,5490.0,,,,11466.0,293,5494,,,,, +95473,10728,566,11365,1678,1331,,,5490.0,,,,12444.0,496,7067,,,,, +19487,550,1941,7932,18329,1331,,,5490.0,,,,11598.0,10167,10122,,,,, +35822,550,4351,12054,18743,1331,,,5490.0,,,,3604.0,463,7738,,,,, +8875,10728,1004,10522,4738,1331,,,5490.0,,,,9127.0,6220,7308,,,,, +45253,10728,8841,10095,3395,1331,,,5490.0,,,,3630.0,993,4939,,,,, +105563,550,6847,4769,1790,1331,,,5490.0,,,,2074.0,5605,8226,,,,, +44031,550,7565,4149,20375,1331,,,5490.0,,,,3288.0,293,5494,,,,, +137146,10728,9284,3072,22322,1331,,,5490.0,,,,11466.0,2691,5807,,,,, +64163,5111,3160,7887,21946,1331,,,5490.0,,,,4969.0,468,9518,,,,, +22025,905,4351,5863,1194,1331,,,5490.0,,,,6118.0,8607,3368,,,,, +10329,550,7565,11019,22714,1331,,,5490.0,,,,1069.0,6921,6205,,,,, +63434,2750,6810,4667,15577,1331,,,5490.0,,,,10048.0,12107,8083,,,,, +114290,10728,4351,2393,7359,1331,,,5490.0,,,,2765.0,8607,3368,,,,, +129485,10728,4351,2393,7359,1331,,,5490.0,,,,2765.0,2443,7969,,,,, +22412,10728,4351,6300,5515,1331,,,5490.0,,,,916.0,4928,8818,,,,, +53813,10728,3575,12068,15316,1331,,,5490.0,,,,11605.0,5361,4813,,,,, +133268,10699,2734,11063,13550,1331,,,5490.0,,,,537.0,6391,7789,,,,, +70729,10728,4448,424,1544,1331,,,5490.0,,,,8084.0,8695,7688,,,,, +37223,10728,566,6300,5515,1331,,,5490.0,,,,916.0,496,7067,,,,, +16349,10728,7565,10522,4738,1331,,,5490.0,,,,9127.0,293,5494,,,,, +8537,10699,4493,2622,15821,1331,,,5490.0,,,,10456.0,6634,2711,,,,, +109704,10728,4351,8923,17405,1331,,,5490.0,,,,13047.0,5695,3655,,,,, +55354,10728,1941,11242,9728,1331,,,5490.0,,,,11470.0,10167,10122,,,,, +67069,10728,7565,1893,1726,1331,,,5490.0,,,,4674.0,293,5494,,,,, +100888,2750,4277,4217,20407,1331,,,5490.0,,,,2827.0,1092,10993,,,,, +100249,10728,5987,6300,5515,1331,,,5490.0,,,,916.0,11454,8747,,,,, +2822,10728,1004,3072,22322,1331,,,5490.0,,,,11466.0,6220,7308,,,,, +56020,905,7565,9667,19409,1331,,,5490.0,,,,11331.0,293,5494,,,,, +16263,10728,7565,219,23469,1331,,,5490.0,,,,11746.0,293,5494,,,,, +12783,10699,4351,11563,8635,1331,,,5490.0,,,,1997.0,7144,8490,,,,, +99876,550,7565,3893,7064,1331,,,5490.0,,,,4949.0,293,5494,,,,, +48203,905,1004,9667,19409,1331,,,5490.0,,,,11331.0,6220,7308,,,,, +49641,10133,3962,10655,9580,1331,,,5490.0,,,,6815.0,12005,4247,,,,, +129655,10728,6910,6300,5515,1331,,,5490.0,,,,916.0,11748,4356,,,,, +128043,2750,4277,8041,6798,1331,,,5490.0,,,,711.0,1092,10993,,,,, +64265,10728,2142,6300,5515,1331,,,5490.0,,,,916.0,9372,9736,,,,, diff --git a/exports/observation.sql b/exports/observation.sql new file mode 100644 index 0000000..012b32b --- /dev/null +++ b/exports/observation.sql @@ -0,0 +1,24 @@ +SELECT +(SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'observation_id' AND CAST(values AS STRING)=CAST(observation.observation_id AS STRING ) ) as observation_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'person_id' AND CAST(values AS STRING)=CAST(observation.person_id AS STRING ) ) as person_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'observation_concept_id' AND CAST(values AS STRING)=CAST(observation.observation_concept_id AS STRING ) ) as observation_concept_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'observation_date' AND CAST(values AS STRING)=CAST(observation.observation_date AS STRING ) ) as observation_date, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'observation_datetime' AND CAST(values AS STRING)=CAST(observation.observation_datetime AS STRING ) ) as observation_datetime, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'observation_type_concept_id' AND CAST(values AS STRING)=CAST(observation.observation_type_concept_id AS STRING ) ) as observation_type_concept_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'value_as_number' AND CAST(values AS STRING)=CAST(observation.value_as_number AS STRING ) ) as value_as_number, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'value_as_string' AND CAST(values AS STRING)=CAST(observation.value_as_string AS STRING ) ) as value_as_string, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'value_as_concept_id' AND CAST(values AS STRING)=CAST(observation.value_as_concept_id AS STRING ) ) as value_as_concept_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'qualifier_concept_id' AND CAST(values AS STRING)=CAST(observation.qualifier_concept_id AS STRING ) ) as qualifier_concept_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'unit_concept_id' AND CAST(values AS STRING)=CAST(observation.unit_concept_id AS STRING ) ) as unit_concept_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'provider_id' AND CAST(values AS STRING)=CAST(observation.provider_id AS STRING ) ) as provider_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'visit_occurrence_id' AND CAST(values AS STRING)=CAST(observation.visit_occurrence_id AS STRING ) ) as visit_occurrence_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'observation_source_value' AND CAST(values AS STRING)=CAST(observation.observation_source_value AS STRING ) ) as observation_source_value, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'observation_source_concept_id' AND CAST(values AS STRING)=CAST(observation.observation_source_concept_id AS STRING ) ) as observation_source_concept_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'unit_source_value' AND CAST(values AS STRING)=CAST(observation.unit_source_value AS STRING ) ) as unit_source_value, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'qualifier_source_value' AND CAST(values AS STRING)=CAST(observation.qualifier_source_value AS STRING ) ) as qualifier_source_value, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'value_source_concept_id' AND CAST(values AS STRING)=CAST(observation.value_source_concept_id AS STRING ) ) as value_source_concept_id, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'value_source_value' AND CAST(values AS STRING)=CAST(observation.value_source_value AS STRING ) ) as value_source_value, + (SELECT encoded FROM wgan_original_pseudo.map WHERE table='observation' AND field = 'questionnaire_response_id' AND CAST(values AS STRING)=CAST(observation.questionnaire_response_id AS STRING ) ) as questionnaire_response_id +FROM wgan_original.observation +WHERE +REGEXP_CONTAINS(UPPER(observation_source_value),'ICD') \ No newline at end of file diff --git a/exports/sample.csv b/exports/sample.csv new file mode 100644 index 0000000..836bc9d --- /dev/null +++ b/exports/sample.csv @@ -0,0 +1,10 @@ +id,first_name,last_name,age,gender +100,steve,nyemba,40,m +101,elon,nyemba,5,m +200,steve,mqueen,80,m +201,james,dean,80,m +300,james,bond,50,m +400,elon,musk,40,m +401,kevin,james,50,m +303,kevin,johnson,40,m +103,Bari,nyemba,5,f diff --git a/gan.py b/gan.py new file mode 100644 index 0000000..0981411 --- /dev/null +++ b/gan.py @@ -0,0 +1,546 @@ +""" +usage : + optional : + --num_gpu number of gpus to use will default to 1 + --epoch steps per epoch default to 256 +""" +import tensorflow as tf +from tensorflow.contrib.layers import l2_regularizer +import numpy as np +import pandas as pd +import time +import os +import sys +from params import SYS_ARGS +from bridge import Binary +import json + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ['CUDA_VISIBLE_DEVICES'] = "0" +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + +# STEPS_PER_EPOCH = int(SYS_ARGS['epoch']) if 'epoch' in SYS_ARGS else 256 +# NUM_GPUS = 1 if 'num_gpu' not in SYS_ARGS else int(SYS_ARGS['num_gpu']) +# BATCHSIZE_PER_GPU = 2000 +# TOTAL_BATCHSIZE = BATCHSIZE_PER_GPU * NUM_GPUS + +class void : + pass +class GNet : + """ + This is the base class of a generative network functions, the details will be implemented in the subclasses. + An instance of this class is accessed as follows + object.layers.normalize applies batch normalization or otherwise + obect.get.variables instanciate variables on cpu and return a reference (tensor) + """ + def __init__(self,**args): + self.layers = void() + self.layers.normalize = self.normalize + + self.get = void() + self.get.variables = self._variable_on_cpu + + self.NUM_GPUS = 1 + + + self.X_SPACE_SIZE = args['real'].shape[1] if 'real' in args else 854 + self.G_STRUCTURE = [128,128] #[self.X_SPACE_SIZE, self.X_SPACE_SIZE] + self.D_STRUCTURE = [self.X_SPACE_SIZE,256,128] #[self.X_SPACE_SIZE, self.X_SPACE_SIZE*2, self.X_SPACE_SIZE] #-- change 854 to number of diagnosis + # self.NUM_LABELS = 8 if 'label' not in args elif len(args['label'].shape) args['label'].shape[1] + if 'label' in args and len(args['label'].shape) == 2 : + self.NUM_LABELS = args['label'].shape[1] + elif 'label' in args and len(args['label']) == 1 : + self.NUM_LABELS = args['label'].shape[0] + else: + self.NUM_LABELS = 8 + self.Z_DIM = 128 #self.X_SPACE_SIZE + self.BATCHSIZE_PER_GPU = args['real'].shape[0] if 'real' in args else 256 + self.TOTAL_BATCHSIZE = self.BATCHSIZE_PER_GPU * self.NUM_GPUS + self.STEPS_PER_EPOCH = 256 #int(np.load('ICD9/train.npy').shape[0] / 2000) + self.MAX_EPOCHS = 10 if 'max_epochs' not in args else int(args['max_epochs']) + self.ROW_COUNT = args['real'].shape[0] if 'real' in args else 100 + self.CONTEXT = args['context'] + self.ATTRIBUTES = {"id":args['column_id'] if 'column_id' in args else None,"synthetic":args['column'] if 'column' in args else None} + self._REAL = args['real'] if 'real' in args else None + self._LABEL = args['label'] if 'label' in args else None + + self.init_logs(**args) + + def init_logs(self,**args): + self.log_dir = args['logs'] if 'logs' in args else 'logs' + self.mkdir(self.log_dir) + # + # + for key in ['train','output'] : + self.mkdir(os.sep.join([self.log_dir,key])) + self.mkdir (os.sep.join([self.log_dir,key,self.CONTEXT])) + + self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT]) + self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT]) + + def load_meta(self,column): + """ + This function is designed to accomodate the uses of the sub-classes outside of a strict dependency model. + Because prediction and training can happen independently + """ + _name = os.sep.join([self.out_dir,'meta-'+column+'.json']) + if os.path.exists(_name) : + attr = json.loads((open(_name)).read()) + for key in attr : + value = attr[key] + setattr(self,key,value) + self.train_dir = os.sep.join([self.log_dir,'train',self.CONTEXT]) + self.out_dir = os.sep.join([self.log_dir,'output',self.CONTEXT]) + + + def log_meta(self,**args) : + object = { + 'CONTEXT':self.CONTEXT, + 'ATTRIBUTES':self.ATTRIBUTES, + 'BATCHSIZE_PER_GPU':self.BATCHSIZE_PER_GPU, + 'Z_DIM':self.Z_DIM, + "X_SPACE_SIZE":self.X_SPACE_SIZE, + "D_STRUCTURE":self.D_STRUCTURE, + "G_STRUCTURE":self.G_STRUCTURE, + "NUM_GPUS":self.NUM_GPUS, + "NUM_LABELS":self.NUM_LABELS, + "MAX_EPOCHS":self.MAX_EPOCHS, + "ROW_COUNT":self.ROW_COUNT + } + if args and 'key' in args and 'value' in args : + key = args['key'] + value= args['value'] + object[key] = value + _name = os.sep.join([self.out_dir,'meta-'+SYS_ARGS['column']]) + f = open(_name+'.json','w') + f.write(json.dumps(object)) + def mkdir (self,path): + if not os.path.exists(path) : + os.mkdir(path) + + + def normalize(self,**args): + """ + This function will perform a batch normalization on an network layer + inputs input layer of the neural network + name name of the scope the + labels labels (attributes not synthesized) by default None + n_labels number of labels default None + """ + inputs = args['inputs'] + name = args['name'] + labels = None if 'labels' not in args else args['labels'] + n_labels= None if 'n_labels' not in args else args['n_labels'] + shift = [0] if self.__class__.__name__.lower() == 'generator' else [1] #-- not sure what this is doing + mean, var = tf.nn.moments(inputs, shift, keep_dims=True) + shape = inputs.shape[1].value + offset_m = self.get.variables(shape=[n_labels,shape], name='offset'+name, + initializer=tf.zeros_initializer) + scale_m = self.get.variables(shape=[n_labels,shape], name='scale'+name, + initializer=tf.ones_initializer) + + offset = tf.nn.embedding_lookup(offset_m, labels) + scale = tf.nn.embedding_lookup(scale_m, labels) + result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8) + return result + + def _variable_on_cpu(self,**args): + """ + This function makes sure variables/tensors are not created on the GPU but rather on the CPU + """ + + name = args['name'] + shape = args['shape'] + initializer=None if 'initializer' not in args else args['initializer'] + with tf.device('/cpu:0') : + cpu_var = tf.compat.v1.get_variable(name,shape,initializer= initializer) + return cpu_var + def average_gradients(self,tower_grads): + average_grads = [] + for grad_and_vars in zip(*tower_grads): + grads = [] + for g, _ in grad_and_vars: + expanded_g = tf.expand_dims(g, 0) + grads.append(expanded_g) + + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + +class Generator (GNet): + """ + This class is designed to handle generation of candidate datasets for this it will aggregate a discriminator, this allows the generator not to be random + + """ + def __init__(self,**args): + GNet.__init__(self,**args) + self.discriminator = Discriminator(**args) + def loss(self,**args): + fake = args['fake'] + label = args['label'] + y_hat_fake = self.discriminator.network(inputs=fake, label=label) + all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + loss = -tf.reduce_mean(y_hat_fake) + sum(all_regs) + tf.add_to_collection('glosses', loss) + return loss, loss + def load_meta(self, column): + super().load_meta(column) + self.discriminator.load_meta(column) + def network(self,**args) : + """ + This function will build the network that will generate the synthetic candidates + :inputs matrix of data that we need + :dim dimensions of ... + """ + x = args['inputs'] + tmp_dim = self.Z_DIM if 'dim' not in args else args['dim'] + label = args['label'] + + with tf.compat.v1.variable_scope('G', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)): + for i, dim in enumerate(self.G_STRUCTURE[:-1]): + kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, dim]) + h1 = self.normalize(inputs=tf.matmul(x, kernel),shift=0, name='cbn' + str(i), labels=label, n_labels=self.NUM_LABELS) + h2 = tf.nn.relu(h1) + x = x + h2 + tmp_dim = dim + i = len(self.G_STRUCTURE) - 1 + # + # This seems to be an extra hidden layer: + # It's goal is to map continuous values to discrete values (pre-trained to do this) + kernel = self.get.variables(name='W_' + str(i), shape=[tmp_dim, self.G_STRUCTURE[-1]]) + h1 = self.normalize(inputs=tf.matmul(x, kernel), name='cbn' + str(i), + labels=label, n_labels=self.NUM_LABELS) + h2 = tf.nn.tanh(h1) + x = x + h2 + # This seems to be the output layer + # + kernel = self.get.variables(name='W_' + str(i+1), shape=[self.Z_DIM, self.X_SPACE_SIZE]) + bias = self.get.variables(name='b_' + str(i+1), shape=[self.X_SPACE_SIZE]) + x = tf.nn.sigmoid(tf.add(tf.matmul(x, kernel), bias)) + return x + +class Discriminator(GNet): + def __init__(self,**args): + GNet.__init__(self,**args) + def network(self,**args): + """ + This function will apply a computational graph on a dataset passed in with the associated labels and the last layer must have a single output (neuron) + :inputs + :label + """ + x = args['inputs'] + print () + print (x[:3,:]) + print() + label = args['label'] + with tf.compat.v1.variable_scope('D', reuse=tf.compat.v1.AUTO_REUSE , regularizer=l2_regularizer(0.00001)): + for i, dim in enumerate(self.D_STRUCTURE[1:]): + kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[i], dim]) + bias = self.get.variables(name='b_' + str(i), shape=[dim]) + print (["\t",bias,kernel]) + x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias)) + x = self.normalize(inputs=x, name='cln' + str(i), shift=1,labels=label, n_labels=self.NUM_LABELS) + i = len(self.D_STRUCTURE) + kernel = self.get.variables(name='W_' + str(i), shape=[self.D_STRUCTURE[-1], 1]) + bias = self.get.variables(name='b_' + str(i), shape=[1]) + y = tf.add(tf.matmul(x, kernel), bias) + return y + + def loss(self,**args) : + """ + This function compute the loss of + :real + :fake + :label + """ + real = args['real'] + fake = args['fake'] + label = args['label'] + epsilon = tf.random.uniform(shape=[self.BATCHSIZE_PER_GPU,1],minval=0,maxval=1) + + x_hat = real + epsilon * (fake - real) + y_hat_fake = self.network(inputs=fake, label=label) + + y_hat_real = self.network(inputs=real, label=label) + y_hat = self.network(inputs=x_hat, label=label) + + grad = tf.gradients(y_hat, [x_hat])[0] + slopes = tf.sqrt(tf.reduce_sum(tf.square(grad), 1)) + gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2) + all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + w_distance = -tf.reduce_mean(y_hat_real) + tf.reduce_mean(y_hat_fake) + loss = w_distance + 10 * gradient_penalty + sum(all_regs) + tf.add_to_collection('dlosses', loss) + + return w_distance, loss +class Train (GNet): + def __init__(self,**args): + GNet.__init__(self,**args) + self.generator = Generator(**args) + self.discriminator = Discriminator(**args) + self._REAL = args['real'] + self._LABEL= args['label'] + # print ([" *** ",self.BATCHSIZE_PER_GPU]) + self.log_meta() + def load_meta(self, column): + """ + This function will delegate the calls to load meta data to it's dependents + column name + """ + super().load_meta(column) + self.generator.load_meta(column) + self.discriminator.load_meta(column) + def loss(self,**args): + """ + This function will compute a "tower" loss of the generated candidate against real data + Training will consist in having both generator and discriminators + :scope + :stage + :real + :label + """ + + scope = args['scope'] + stage = args['stage'] + real = args['real'] + label = args['label'] + label = tf.cast(label, tf.int32) + # + # @TODO: Ziqi needs to explain what's going on here + m = [[i] for i in np.arange(self._LABEL.shape[1]-2)] + label = label[:, 1] * len(m) + tf.squeeze( + tf.matmul(label[:, 2:], tf.constant(m, dtype=tf.int32)) + ) + # label = label[:,1] * 4 + tf.squeeze( label[:,2]*[[0],[1],[2],[3]] ) + z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM]) + + fake = self.generator.network(inputs=z, label=label) + if stage == 'D': + w, loss = self.discriminator.loss(real=real, fake=fake, label=label) + losses = tf.get_collection('dlosses', scope) + else: + w, loss = self.generator.loss(fake=fake, label=label) + losses = tf.get_collection('glosses', scope) + + total_loss = tf.add_n(losses, name='total_loss') + + return total_loss, w + def input_fn(self): + """ + This function seems to produce + """ + features_placeholder = tf.compat.v1.placeholder(shape=self._REAL.shape, dtype=tf.float32) + labels_placeholder = tf.compat.v1.placeholder(shape=self._LABEL.shape, dtype=tf.float32) + dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder)) + dataset = dataset.repeat(10000) + dataset = dataset.batch(batch_size=self.BATCHSIZE_PER_GPU) + dataset = dataset.prefetch(1) + iterator = dataset.make_initializable_iterator() + # next_element = iterator.get_next() + # init_op = iterator.initializer + return iterator, features_placeholder, labels_placeholder + + def network(self,**args): + # def graph(stage, opt): + # global_step = tf.get_variable(stage+'_step', [], initializer=tf.constant_initializer(0), trainable=False) + stage = args['stage'] + opt = args['opt'] + tower_grads = [] + per_gpu_w = [] + iterator, features_placeholder, labels_placeholder = self.input_fn() + with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope()): + for i in range(self.NUM_GPUS): + with tf.device('/gpu:%d' % i): + with tf.name_scope('%s_%d' % ('TOWER', i)) as scope: + (real, label) = iterator.get_next() + loss, w = self.loss(scope=scope, stage=stage, real=self._REAL, label=self._LABEL) + tf.get_variable_scope().reuse_variables() + vars_ = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=stage) + grads = opt.compute_gradients(loss, vars_) + tower_grads.append(grads) + per_gpu_w.append(w) + + grads = self.average_gradients(tower_grads) + apply_gradient_op = opt.apply_gradients(grads) + + mean_w = tf.reduce_mean(per_gpu_w) + train_op = apply_gradient_op + return train_op, mean_w, iterator, features_placeholder, labels_placeholder + def apply(self,**args): + # max_epochs = args['max_epochs'] if 'max_epochs' in args else 10 + REAL = self._REAL + LABEL= self._LABEL + with tf.device('/cpu:0'): + opt_d = tf.compat.v1.train.AdamOptimizer(1e-4) + opt_g = tf.compat.v1.train.AdamOptimizer(1e-4) + + train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = self.network(stage='D', opt=opt_d) + train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = self.network(stage='G', opt=opt_g) + # saver = tf.train.Saver() + saver = tf.compat.v1.train.Saver() + init = tf.global_variables_initializer() + + with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: + sess.run(init) + sess.run(iterator_d.initializer, + feed_dict={features_placeholder_d: REAL, labels_placeholder_d: LABEL}) + sess.run(iterator_g.initializer, + feed_dict={features_placeholder_g: REAL, labels_placeholder_g: LABEL}) + + for epoch in range(1, self.MAX_EPOCHS + 1): + start_time = time.time() + w_sum = 0 + for i in range(self.STEPS_PER_EPOCH): + for _ in range(2): + _, w = sess.run([train_d, w_distance]) + w_sum += w + sess.run(train_g) + duration = time.time() - start_time + + assert not np.isnan(w_sum), 'Model diverged with loss = NaN' + + format_str = 'epoch: %d, w_distance = %f (%.1f)' + print(format_str % (epoch, -w_sum/(self.STEPS_PER_EPOCH*2), duration)) + if epoch % self.MAX_EPOCHS == 0: + + _name = os.sep.join([self.train_dir,self.ATTRIBUTES['synthetic']]) + # saver.save(sess, self.train_dir, write_meta_graph=False, global_step=epoch) + saver.save(sess, _name, write_meta_graph=False, global_step=epoch) + # + # + +class Predict(GNet): + """ + This class uses synthetic data given a learned model + """ + def __init__(self,**args): + GNet.__init__(self,**args) + self.generator = Generator(**args) + self.values = values + def load_meta(self, column): + super().load_meta(column) + self.generator.load_meta(column) + def apply(self,**args): + # print (self.train_dir) + model_dir = os.sep.join([self.train_dir,self.ATTRIBUTES['synthetic']+'-'+str(self.MAX_EPOCHS)]) + demo = self._LABEL #np.zeros([self.ROW_COUNT,self.NUM_LABELS]) #args['de"shape":{"LABEL":list(self._LABEL.shape)} mo'] + tf.compat.v1.reset_default_graph() + z = tf.random.normal(shape=[self.BATCHSIZE_PER_GPU, self.Z_DIM]) + y = tf.compat.v1.placeholder(shape=[self.BATCHSIZE_PER_GPU, self.NUM_LABELS], dtype=tf.int32) + ma = [[i] for i in np.arange(self.NUM_LABELS - 2)] + label = y[:, 1] * len(ma) + tf.squeeze(tf.matmul(y[:, 2:], tf.constant(ma, dtype=tf.int32))) + + fake = self.generator.network(inputs=z, label=label) + init = tf.compat.v1.global_variables_initializer() + saver = tf.compat.v1.train.Saver() + with tf.compat.v1.Session() as sess: + + # sess.run(init) + saver.restore(sess, model_dir) + labels = np.zeros((self.ROW_COUNT,self.NUM_LABELS) ) + + labels= demo + f = sess.run(fake,feed_dict={y:labels}) + # + # if we are dealing with numeric values only we can perform a simple marginal sum against the indexes + # + + df = ( pd.DataFrame(np.round(f).astype(np.int32),columns=values)) + # i = df.T.index.astype(np.int32) #-- These are numeric pseudonyms + # df = (i * df).sum(axis=1) + # + # In case we are dealing with actual values like diagnosis codes we can perform + # + r = np.zeros((self.ROW_COUNT,1)) + for col in df : + i = np.where(df[col])[0] + r[i] = col + df = pd.DataFrame(r,columns=[self.ATTRIBUTES['synthetic']]) + + return df.to_dict(orient='list') + # count = str(len(os.listdir(self.out_dir))) + # _name = os.sep.join([self.out_dir,self.CONTEXT+'-'+count+'.csv']) + # df.to_csv(_name,index=False) + + + # output.extend(np.round(f)) + + # for m in range(2): + # for n in range(2, self.NUM_LABELS): + # idx1 = (demo[:, m] == 1) + # idx2 = (demo[:, n] == 1) + # idx = [idx1[j] and idx2[j] for j in range(len(idx1))] + # num = np.sum(idx) + # print ("_____________________") + # print (idx1) + # print (idx2) + # print (idx) + # print (num) + # print ("_____________________") + # nbatch = int(np.ceil(num / self.BATCHSIZE_PER_GPU)) + # label_input = np.zeros((nbatch*self.BATCHSIZE_PER_GPU, self.NUM_LABELS)) + # label_input[:, n] = 1 + # label_input[:, m] = 1 + # output = [] + # for i in range(nbatch): + # f = sess.run(fake,feed_dict={y: label_input[i* self.BATCHSIZE_PER_GPU:(i+1)* self.BATCHSIZE_PER_GPU]}) + # output.extend(np.round(f)) + # output = np.array(output)[:num] + # print ([m,n,output]) + + # np.save(self.out_dir + str(m) + str(n), output) + + +if __name__ == '__main__' : + # + # Now we get things done ... + column = SYS_ARGS['column'] + column_id = SYS_ARGS['id'] if 'id' in SYS_ARGS else 'person_id' + df = pd.read_csv(SYS_ARGS['raw-data']) + LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values + + context = SYS_ARGS['raw-data'].split(os.sep)[-1:][0][:-4] + if set(['train','learn']) & set(SYS_ARGS.keys()): + + df = pd.read_csv(SYS_ARGS['raw-data']) + + # cols = SYS_ARGS['column'] + # _map,_df = (Binary()).Export(df) + # i = np.arange(_map[column]['start'],_map[column]['end']) + max_epochs = np.int32(SYS_ARGS['max_epochs']) if 'max_epochs' in SYS_ARGS else 10 + # REAL = _df[:,i] + REAL = pd.get_dummies(df[column]).astype(np.float32).values + LABEL = pd.get_dummies(df[column_id]).astype(np.float32).values + trainer = Train(context=context,max_epochs=max_epochs,real=REAL,label=LABEL,column=column,column_id=column_id) + trainer.apply() + + + + + # + # We should train upon this data + # + # -- we need to convert the data-frame to binary matrix, given a column + # + pass + elif 'generate' in SYS_ARGS: + values = df[column].unique().tolist() + values.sort() + p = Predict(context=context,label=LABEL,values=values) + p.load_meta(column) + r = p.apply() + print (df) + print () + df[column] = r[column] + print (df) + + else: + print (SYS_ARGS.keys()) + print (__doc__) + pass + diff --git a/multi_GPU.py b/multi_GPU.py new file mode 100644 index 0000000..0dfdff0 --- /dev/null +++ b/multi_GPU.py @@ -0,0 +1,286 @@ +import tensorflow as tf +from tensorflow.contrib.layers import l2_regularizer +import numpy as np +import time +import os + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + +#### id of gpu to use +os.environ['CUDA_VISIBLE_DEVICES'] = "0" + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + +#### training data +#### shape=(n_sample, n_code=854) +REAL = np.load('') + +#### demographic for training data +#### shape=(n_sample, 6) +#### if sample_x is male, then LABEL[x,0]=1, else LABEL[x,1]=1 +#### if sample_x's is within 0-17, then LABEL[x,2]=1 +#### elif sample_x's is within 18-44, then LABEL[x,3]=1 +#### elif sample_x's is within 45-64, then LABEL[x,4]=1 +#### elif sample_x's is within 64-, then LABEL[x,5]=1 +LABEL = np.load('') + +#### training parameters +NUM_GPUS = 1 +BATCHSIZE_PER_GPU = 2000 +TOTAL_BATCHSIZE = BATCHSIZE_PER_GPU * NUM_GPUS +STEPS_PER_EPOCH = int(np.load('ICD9/train.npy').shape[0] / 2000) + +g_structure = [128, 128] +d_structure = [854, 256, 128] +z_dim = 128 + +def _variable_on_cpu(name, shape, initializer=None): + with tf.device('/cpu:0'): + var = tf.get_variable(name, shape, initializer=initializer) + return var + + +def batchnorm(inputs, name, labels=None, n_labels=None): + mean, var = tf.nn.moments(inputs, [0], keep_dims=True) + shape = mean.shape[1].value + offset_m = _variable_on_cpu(shape=[n_labels,shape], name='offset'+name, + initializer=tf.zeros_initializer) + scale_m = _variable_on_cpu(shape=[n_labels,shape], name='scale'+name, + initializer=tf.ones_initializer) + offset = tf.nn.embedding_lookup(offset_m, labels) + scale = tf.nn.embedding_lookup(scale_m, labels) + result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8) + return result + + +def layernorm(inputs, name, labels=None, n_labels=None): + mean, var = tf.nn.moments(inputs, [1], keep_dims=True) + shape = inputs.shape[1].value + offset_m = _variable_on_cpu(shape=[n_labels,shape], name='offset'+name, + initializer=tf.zeros_initializer) + scale_m = _variable_on_cpu(shape=[n_labels,shape], name='scale'+name, + initializer=tf.ones_initializer) + offset = tf.nn.embedding_lookup(offset_m, labels) + scale = tf.nn.embedding_lookup(scale_m, labels) + result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8) + return result + + +def input_fn(): + features_placeholder = tf.placeholder(shape=REAL.shape, dtype=tf.float32) + labels_placeholder = tf.placeholder(shape=LABEL.shape, dtype=tf.float32) + dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder)) + dataset = dataset.repeat(10000) + dataset = dataset.batch(batch_size=BATCHSIZE_PER_GPU) + dataset = dataset.prefetch(1) + iterator = dataset.make_initializable_iterator() + # next_element = iterator.get_next() + # init_op = iterator.initializer + return iterator, features_placeholder, labels_placeholder + + +def generator(z, label): + x = z + tmp_dim = z_dim + with tf.variable_scope('G', reuse=tf.AUTO_REUSE, regularizer=l2_regularizer(0.00001)): + for i, dim in enumerate(g_structure[:-1]): + kernel = _variable_on_cpu('W_' + str(i), shape=[tmp_dim, dim]) + h1 = batchnorm(tf.matmul(x, kernel), name='cbn' + str(i), labels=label, n_labels=8) + h2 = tf.nn.relu(h1) + x = x + h2 + tmp_dim = dim + i = len(g_structure) - 1 + kernel = _variable_on_cpu('W_' + str(i), shape=[tmp_dim, g_structure[-1]]) + h1 = batchnorm(tf.matmul(x, kernel), name='cbn' + str(i), + labels=label, n_labels=8) + h2 = tf.nn.tanh(h1) + x = x + h2 + + kernel = _variable_on_cpu('W_' + str(i+1), shape=[128, 854]) + bias = _variable_on_cpu('b_' + str(i+1), shape=[854]) + x = tf.nn.sigmoid(tf.add(tf.matmul(x, kernel), bias)) + return x + + +def discriminator(x, label): + with tf.variable_scope('D', reuse=tf.AUTO_REUSE, regularizer=l2_regularizer(0.00001)): + for i, dim in enumerate(d_structure[1:]): + kernel = _variable_on_cpu('W_' + str(i), shape=[d_structure[i], dim]) + bias = _variable_on_cpu('b_' + str(i), shape=[dim]) + x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias)) + x = layernorm(x, name='cln' + str(i), labels=label, n_labels=8) + i = len(d_structure) + kernel = _variable_on_cpu('W_' + str(i), shape=[d_structure[-1], 1]) + bias = _variable_on_cpu('b_' + str(i), shape=[1]) + y = tf.add(tf.matmul(x, kernel), bias) + return y + + +def compute_dloss(real, fake, label): + epsilon = tf.random_uniform( + shape=[BATCHSIZE_PER_GPU, 1], + minval=0., + maxval=1.) + x_hat = real + epsilon * (fake - real) + y_hat_fake = discriminator(fake, label) + y_hat_real = discriminator(real, label) + y_hat = discriminator(x_hat, label) + + grad = tf.gradients(y_hat, [x_hat])[0] + slopes = tf.sqrt(tf.reduce_sum(tf.square(grad), 1)) + gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2) + all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + w_distance = -tf.reduce_mean(y_hat_real) + tf.reduce_mean(y_hat_fake) + loss = w_distance + 10 * gradient_penalty + sum(all_regs) + tf.add_to_collection('dlosses', loss) + + return w_distance, loss + + +def compute_gloss(fake, label): + y_hat_fake = discriminator(fake, label) + all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + loss = -tf.reduce_mean(y_hat_fake) + sum(all_regs) + tf.add_to_collection('glosses', loss) + return loss, loss + + +def tower_loss(scope, stage, real, label): + label = tf.cast(label, tf.int32) + label = label[:, 1] * 4 + tf.squeeze( + tf.matmul(label[:, 2:], tf.constant([[0], [1], [2], [3]], dtype=tf.int32))) + z = tf.random_normal(shape=[BATCHSIZE_PER_GPU, z_dim]) + fake = generator(z, label) + if stage == 'D': + w, loss = compute_dloss(real, fake, label) + losses = tf.get_collection('dlosses', scope) + else: + w, loss = compute_gloss(fake, label) + losses = tf.get_collection('glosses', scope) + + total_loss = tf.add_n(losses, name='total_loss') + + # loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') + # loss_averages_op = loss_averages.apply(losses + [total_loss]) + # + # with tf.control_dependencies([loss_averages_op]): + # total_loss = tf.identity(total_loss) + + return total_loss, w + + +def average_gradients(tower_grads): + average_grads = [] + for grad_and_vars in zip(*tower_grads): + grads = [] + for g, _ in grad_and_vars: + expanded_g = tf.expand_dims(g, 0) + grads.append(expanded_g) + + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + +def graph(stage, opt): + # global_step = tf.get_variable(stage+'_step', [], initializer=tf.constant_initializer(0), trainable=False) + tower_grads = [] + per_gpu_w = [] + iterator, features_placeholder, labels_placeholder = input_fn() + with tf.variable_scope(tf.get_variable_scope()): + for i in range(NUM_GPUS): + with tf.device('/gpu:%d' % i): + with tf.name_scope('%s_%d' % ('TOWER', i)) as scope: + (real, label) = iterator.get_next() + loss, w = tower_loss(scope, stage, real, label) + tf.get_variable_scope().reuse_variables() + vars_ = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=stage) + grads = opt.compute_gradients(loss, vars_) + tower_grads.append(grads) + per_gpu_w.append(w) + + grads = average_gradients(tower_grads) + apply_gradient_op = opt.apply_gradients(grads) + + mean_w = tf.reduce_mean(per_gpu_w) + train_op = apply_gradient_op + return train_op, mean_w, iterator, features_placeholder, labels_placeholder + + +def train(max_epochs, train_dir): + with tf.device('/cpu:0'): + opt_d = tf.train.AdamOptimizer(1e-4) + opt_g = tf.train.AdamOptimizer(1e-4) + train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = graph('D', opt_d) + train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = graph('G', opt_g) + saver = tf.train.Saver() + init = tf.global_variables_initializer() + + with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: + sess.run(init) + sess.run(iterator_d.initializer, + feed_dict={features_placeholder_d: REAL, labels_placeholder_d: LABEL}) + sess.run(iterator_g.initializer, + feed_dict={features_placeholder_g: REAL, labels_placeholder_g: LABEL}) + + for epoch in range(1, max_epochs + 1): + start_time = time.time() + w_sum = 0 + for i in range(STEPS_PER_EPOCH): + for _ in range(2): + _, w = sess.run([train_d, w_distance]) + w_sum += w + sess.run(train_g) + duration = time.time() - start_time + + assert not np.isnan(w_sum), 'Model diverged with loss = NaN' + + format_str = 'epoch: %d, w_distance = %f (%.1f)' + print(format_str % (epoch, -w_sum/(STEPS_PER_EPOCH*2), duration)) + if epoch % 500 == 0: + # checkpoint_path = os.path.join(train_dir, 'multi') + saver.save(sess, train_dir, write_meta_graph=False, global_step=epoch) + # saver.save(sess, train_dir, global_step=epoch) + + +def generate(model_dir, synthetic_dir, demo): + tf.reset_default_graph() + z = tf.random_normal(shape=[BATCHSIZE_PER_GPU, z_dim]) + y = tf.placeholder(shape=[BATCHSIZE_PER_GPU, 6], dtype=tf.int32) + label = y[:, 1] * 4 + tf.squeeze(tf.matmul(y[:, 2:], tf.constant([[0], [1], [2], [3]], dtype=tf.int32))) + fake = generator(z, label) + saver = tf.train.Saver() + with tf.Session() as sess: + saver.restore(sess, model_dir) + for m in range(2): + for n in range(2, 6): + idx1 = (demo[:, m] == 1) + idx2 = (demo[:, n] == 1) + idx = [idx1[j] and idx2[j] for j in range(len(idx1))] + num = np.sum(idx) + nbatch = int(np.ceil(num / BATCHSIZE_PER_GPU)) + label_input = np.zeros((nbatch*BATCHSIZE_PER_GPU, 6)) + label_input[:, n] = 1 + label_input[:, m] = 1 + output = [] + for i in range(nbatch): + f = sess.run(fake,feed_dict={y: label_input[i*BATCHSIZE_PER_GPU:(i+1)*BATCHSIZE_PER_GPU]}) + output.extend(np.round(f)) + output = np.array(output)[:num] + np.save(synthetic_dir + str(m) + str(n), output) + + +if __name__ == '__main__': + #### args_1: number of training epochs + #### args_2: dir to save the trained model + train(500, '') + + #### args_1: dir of trained model + #### args_2: dir to save synthetic data + #### args_3, label of data-to-be-generated + generate('', '', demo=LABEL) + diff --git a/params.py b/params.py new file mode 100644 index 0000000..999b919 --- /dev/null +++ b/params.py @@ -0,0 +1,18 @@ +import sys + +SYS_ARGS = {'context':''} +if len(sys.argv) > 1: + + N = len(sys.argv) + for i in range(1,N): + value = None + if sys.argv[i].startswith('--'): + key = sys.argv[i][2:] #.replace('-','') + SYS_ARGS[key] = 1 + if i + 1 < N: + value = sys.argv[i + 1] = sys.argv[i+1].strip() + if key and value: + SYS_ARGS[key] = value + + + i += 2 diff --git a/params.pyc b/params.pyc new file mode 100644 index 0000000..a7beedb Binary files /dev/null and b/params.pyc differ diff --git a/test.py b/test.py new file mode 100644 index 0000000..717fc93 --- /dev/null +++ b/test.py @@ -0,0 +1,287 @@ +import tensorflow as tf +from tensorflow.contrib.layers import l2_regularizer +import numpy as np +import time +import os +# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +# os.environ['CUDA_VISIBLE_DEVICES'] = "4,5" +# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + + +FLAGS = tf.app.flags.FLAGS + +tf.app.flags.DEFINE_string('train_dir', 'google_cloud_test/', + """Directory where to store checkpoint. """) +tf.app.flags.DEFINE_string('save_dir', 'google_cloud_test/', + """Directory where to save generated data. """) +tf.app.flags.DEFINE_integer('max_steps', 100, + """Number of batches to run in each epoch.""") +tf.app.flags.DEFINE_integer('max_epochs', 100, + """Number of epochs to run.""") +tf.app.flags.DEFINE_integer('batchsize', 10, + """Batchsize.""") +tf.app.flags.DEFINE_integer('z_dim', 10, + """Dimensionality of random input.""") +tf.app.flags.DEFINE_integer('data_dim', 30, + """Dimensionality of data.""") +tf.app.flags.DEFINE_integer('demo_dim', 8, + """Dimensionality of demographics.""") +tf.app.flags.DEFINE_float('reg', 0.0001, + """L2 regularization.""") + +g_structure = [FLAGS.z_dim, FLAGS.z_dim] +d_structure = [FLAGS.data_dim, int(FLAGS.data_dim/2), FLAGS.z_dim] + + +def _variable_on_cpu(name, shape, initializer=None): + with tf.device('/cpu:0'): + var = tf.get_variable(name, shape, initializer=initializer) + return var + + +def batchnorm(inputs, name, labels=None, n_labels=None): + mean, var = tf.nn.moments(inputs, [0], keep_dims=True) + shape = mean.shape[1].value + offset_m = _variable_on_cpu(shape=[n_labels,shape], name='offset'+name, + initializer=tf.zeros_initializer) + scale_m = _variable_on_cpu(shape=[n_labels,shape], name='scale'+name, + initializer=tf.ones_initializer) + offset = tf.nn.embedding_lookup(offset_m, labels) + scale = tf.nn.embedding_lookup(scale_m, labels) + result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8) + return result + + +def layernorm(inputs, name, labels=None, n_labels=None): + mean, var = tf.nn.moments(inputs, [1], keep_dims=True) + shape = inputs.shape[1].value + offset_m = _variable_on_cpu(shape=[n_labels,shape], name='offset'+name, + initializer=tf.zeros_initializer) + scale_m = _variable_on_cpu(shape=[n_labels,shape], name='scale'+name, + initializer=tf.ones_initializer) + offset = tf.nn.embedding_lookup(offset_m, labels) + scale = tf.nn.embedding_lookup(scale_m, labels) + result = tf.nn.batch_normalization(inputs, mean, var, offset, scale, 1e-8) + return result + + +def input_fn(): + features_placeholder = tf.placeholder(shape=[None, FLAGS.data_dim], dtype=tf.float32) + labels_placeholder = tf.placeholder(shape=[None, 6], dtype=tf.float32) + dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder)) + dataset = dataset.repeat(10000) + dataset = dataset.batch(batch_size=FLAGS.batchsize) + dataset = dataset.prefetch(1) + iterator = dataset.make_initializable_iterator() + return iterator, features_placeholder, labels_placeholder + + +def generator(z, label): + x = z + tmp_dim = FLAGS.z_dim + with tf.variable_scope('G', reuse=tf.AUTO_REUSE, regularizer=l2_regularizer(FLAGS.reg)): + for i, dim in enumerate(g_structure[:-1]): + kernel = _variable_on_cpu('W_' + str(i), shape=[tmp_dim, dim]) + h1 = batchnorm(tf.matmul(x, kernel), name='cbn' + str(i), labels=label, n_labels=FLAGS.demo_dim) + h2 = tf.nn.relu(h1) + x = x + h2 + tmp_dim = dim + i = len(g_structure) - 1 + kernel = _variable_on_cpu('W_' + str(i), shape=[tmp_dim, g_structure[-1]]) + h1 = batchnorm(tf.matmul(x, kernel), name='cbn' + str(i), + labels=label, n_labels=FLAGS.demo_dim) + h2 = tf.nn.tanh(h1) + x = x + h2 + + kernel = _variable_on_cpu('W_' + str(i+1), shape=[FLAGS.z_dim, FLAGS.data_dim]) + bias = _variable_on_cpu('b_' + str(i+1), shape=[FLAGS.data_dim]) + x = tf.nn.sigmoid(tf.add(tf.matmul(x, kernel), bias)) + return x + + +def discriminator(x, label): + with tf.variable_scope('D', reuse=tf.AUTO_REUSE, regularizer=l2_regularizer(FLAGS.reg)): + for i, dim in enumerate(d_structure[1:]): + kernel = _variable_on_cpu('W_' + str(i), shape=[d_structure[i], dim]) + bias = _variable_on_cpu('b_' + str(i), shape=[dim]) + x = tf.nn.relu(tf.add(tf.matmul(x, kernel), bias)) + x = layernorm(x, name='cln' + str(i), labels=label, n_labels=FLAGS.demo_dim) + i = len(d_structure) + kernel = _variable_on_cpu('W_' + str(i), shape=[d_structure[-1], 1]) + bias = _variable_on_cpu('b_' + str(i), shape=[1]) + y = tf.add(tf.matmul(x, kernel), bias) + return y + + +def compute_dloss(real, fake, label): + epsilon = tf.random_uniform( + shape=[FLAGS.batchsize, 1], + minval=0., + maxval=1.) + x_hat = real + epsilon * (fake - real) + y_hat_fake = discriminator(fake, label) + y_hat_real = discriminator(real, label) + y_hat = discriminator(x_hat, label) + + grad = tf.gradients(y_hat, [x_hat])[0] + slopes = tf.sqrt(tf.reduce_sum(tf.square(grad), 1)) + gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2) + all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + w_distance = -tf.reduce_mean(y_hat_real) + tf.reduce_mean(y_hat_fake)+sum(all_regs) + loss = w_distance + 10 * gradient_penalty + tf.add_to_collection('dlosses', loss) + + return w_distance, loss + + +def compute_gloss(fake, label): + y_hat_fake = discriminator(fake, label) + all_regs = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + loss = -tf.reduce_mean(y_hat_fake)+sum(all_regs) + tf.add_to_collection('glosses', loss) + return loss, loss + + +def tower_loss(scope, stage, real, label): + label = tf.cast(label, tf.int32) + print ([stage,label.shape]) + label = label[:, 1] * 4 + tf.squeeze( + tf.matmul(label[:, 2:], tf.constant([[0], [1], [2], [3]], dtype=tf.int32))) + z = tf.random_normal(shape=[FLAGS.batchsize, FLAGS.z_dim]) + fake = generator(z, label) + if stage == 'D': + w, loss = compute_dloss(real, fake, label) + losses = tf.get_collection('dlosses', scope) + else: + w, loss = compute_gloss(fake, label) + losses = tf.get_collection('glosses', scope) + + total_loss = tf.add_n(losses, name='total_loss') + return total_loss, w + + +def average_gradients(tower_grads): + average_grads = [] + for grad_and_vars in zip(*tower_grads): + grads = [] + for g, _ in grad_and_vars: + expanded_g = tf.expand_dims(g, 0) + grads.append(expanded_g) + + grad = tf.concat(axis=0, values=grads) + grad = tf.reduce_mean(grad, 0) + + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + +def graph(stage, opt): + tower_grads = [] + per_gpu_w = [] + iterator, features_placeholder, labels_placeholder = input_fn() + with tf.variable_scope(tf.get_variable_scope()): + for i in range(1): + with tf.device('/cpu:0'): + with tf.name_scope('%s_%d' % ('TOWER', i)) as scope: + (real, label) = iterator.get_next() + + loss, w = tower_loss(scope, stage, real, label) + tf.get_variable_scope().reuse_variables() + vars_ = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=stage) + grads = opt.compute_gradients(loss, vars_) + tower_grads.append(grads) + per_gpu_w.append(w) + + grads = average_gradients(tower_grads) + apply_gradient_op = opt.apply_gradients(grads) + + mean_w = tf.reduce_mean(per_gpu_w) + train_op = apply_gradient_op + return train_op, mean_w, iterator, features_placeholder, labels_placeholder + + +def train(data, demo): + with tf.device('/cpu:0'): + opt_d = tf.train.AdamOptimizer(1e-4) + opt_g = tf.train.AdamOptimizer(1e-4) + train_d, w_distance, iterator_d, features_placeholder_d, labels_placeholder_d = graph('D', opt_d) + train_g, _, iterator_g, features_placeholder_g, labels_placeholder_g = graph('G', opt_g) + saver = tf.train.Saver() + init = tf.global_variables_initializer() + + with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: + sess.run(init) + sess.run(iterator_d.initializer, + feed_dict={features_placeholder_d: data, + labels_placeholder_d: demo}) + sess.run(iterator_g.initializer, + feed_dict={features_placeholder_g: data, + labels_placeholder_g: demo}) + + for epoch in range(1, FLAGS.max_epochs + 1): + start_time = time.time() + w_sum = 0 + for i in range(FLAGS.max_steps): + for _ in range(2): + _, w = sess.run([train_d, w_distance]) + w_sum += w + sess.run(train_g) + duration = time.time() - start_time + + assert not np.isnan(w_sum), 'Model diverged with loss = NaN' + + format_str = 'epoch: %d, w_distance = %f (%.1f)' + print(format_str % (epoch, -w_sum/(FLAGS.max_steps*2), duration)) + if epoch % FLAGS.max_epochs == 0: + # checkpoint_path = os.path.join(train_dir, 'multi') + saver.save(sess, FLAGS.train_dir + 'emr_wgan', write_meta_graph=False, global_step=epoch) + # saver.save(sess, train_dir, global_step=epoch) + + +def generate(demo): + z = tf.random_normal(shape=[FLAGS.batchsize, FLAGS.z_dim]) + y = tf.placeholder(shape=[FLAGS.batchsize, 6], dtype=tf.int32) + label = y[:, 1] * 4 + tf.squeeze(tf.matmul(y[:, 2:], tf.constant([[0], [1], [2], [3]], dtype=tf.int32))) + fake = generator(z, label) + saver = tf.train.Saver() + with tf.Session() as sess: + saver.restore(sess, FLAGS.train_dir + 'emr_wgan-' + str(FLAGS.max_epochs)) + for m in range(2): + for n in range(2, 6): + idx1 = (demo[:, m] == 1) + idx2 = (demo[:, n] == 1) + idx = [idx1[j] and idx2[j] for j in range(len(idx1))] + num = np.sum(idx) + nbatch = int(np.ceil(num / FLAGS.batchsize)) + label_input = np.zeros((nbatch*FLAGS.batchsize, 6)) + label_input[:, n] = 1 + label_input[:, m] = 1 + output = [] + for i in range(nbatch): + f = sess.run(fake,feed_dict={y: label_input[i*FLAGS.batchsize:(i+1)*FLAGS.batchsize]}) + output.extend(np.round(f)) + output = np.array(output)[:num] + np.save(FLAGS.save_dir + 'synthetic_' + str(m) + str(n), output) + + +def load_data(): + data = np.zeros(3000) + idx = np.random.choice(np.arange(3000),size=900) + data[idx] = 1 + data = np.reshape(data, (100,30)) + idx = np.random.randint(2,6,size=100) + idx2 = np.random.randint(2,size=100) + demo = np.zeros((100,6)) + demo[np.arange(100), idx] = 1 + demo[np.arange(100), idx2] = 1 + return data, demo + + +if __name__ == '__main__': + data, demo = load_data() + print ([data.shape,demo.shape]) + train(data, demo) + # generate(demo) + diff --git a/vumc-test.json b/vumc-test.json new file mode 100644 index 0000000..a525699 --- /dev/null +++ b/vumc-test.json @@ -0,0 +1,12 @@ +{ + "type": "service_account", + "project_id": "aou-res-deid-vumc-test", + "private_key_id": "8b7acef9a1f1137799011cf13cf0906e331c472e", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCYRPv0ZMGLXjva\nVZjJlcApDpXhJl2iDghhG0JqUH1PmuLjMtmhuMSgweq+M3KNF92Wft9Ree+fTN6m\nVtyqZMgz1qXi6I1WJHyT+ndtk4eWlE4O1AxE0QkfLqtj1kafU6Yu2tGpZ23jHFG9\nc7oq1tqPwC39pKE3ScShcpbZxFqvOFwW7ZSHEQ2Zk0/9lA0bfQH+Vaq1JqBbMkCO\nh1p1ptXPHyIoTjgbtQ/3N6JHA9XpqF1DHFQTe6H/4Zc+GUBV8kb/9pdeybcrhd1K\nVzuT6pAkOLQ7Wtq9Hwl3zAF3jyhlEpirYt4tjcw1pq0phhUuDGcLS37cTzWkqekr\nFEp8NkSnAgMBAAECggEAI16Kw+cPigb2ki2l0tVlEGRh7i2SPE1UJvJFCBrwMKiC\noVGzebxIeCrzEwEyT5HGl+mah/tx7KfXY/3zPeUxF9F5MO7hvau2AE2CpkJJkXGb\nfBhHTUjc/JBDoWopd2LfzCxp3Ra4ULPITOBv0vmbRR7Xz/4IsKYC9Zl/btAMXHy4\nJZZuifK8mCD4BDXxG6W2p+jqeKFjKYTuHyCKWy9u8NnnH6eoNMLvewr/P3pPZK9l\nSFQDV0nWU0yZoR4cccYHtq/9Uw1pY7A9iNYI4JnAnPam8Rka0OEgZbqMVsk3FUmA\nG+SOtiJ9iopQsW5g/HTG7Q420gijnfe5IWQK6yLBOQKBgQDNCuGexHMUGB+/bxFK\nnQ+AiktFib76PbMYFSGdsQQYHGcNHXmXRnJbpj/llO7tiWk/akOA0UrjtipXERTP\nYoXRDlghvnluxUYDm+mD94jSe7rE45b+sNH8FyqgrHWJVHSPBcIz0YXCUxRmE9eq\n4BcNfTqtjAl7hasWhGUVlXppawKBgQC+HJn1Lpvp89h+7ge09p6SU6RhAbOygrtA\nBD3Odr6WV6SGXEKyFHSHLkRVA1BFzzTXl3nEJvHFe7I5RNnVzWSqmf4LkBcIDqQO\nmiNb2TbA/h4utlMJvTrit03qdzngvgmoWyKqNpxmj6afNU/up4ck0hqBkJae/FBQ\nkoSwXcA0tQKBgDJzE/JZiasPCHi0nj+Kh27sF/sjGj8+ARvSzzOag1RfYKekce9b\noPWV4TDexS7i2WeGANfoJxICF0bW6BTiu+QlMGAVGpG7ri9jJECZHiwTz290RAmk\nffYVySJBbKX+hrNOCmtviQa4JFO9XBoqCuIBxvc+dnLS/7aJmsmFvtnDAoGAfQRf\n9gzdeN7i+q1bIhSfuIgKa8RrwDMaIgHoBxKtSD6AMd8P+P1cl9zEEMeqDQ4yqKey\n6lvV19D9JY3yVhfIYCv+FOp/Sswd9IBGSkswJ3+0p3E8cAYhaB+0vEAFLpap0S2F\nQTvCY+uJXd74Hm/KflswFQ3ZDtnLkwCXA0fTcpUCgYBMkcE6Bn0tIShaXsaaufIW\nXrJ6gtEUDtUXP85lNO7hUxBWTu2dF6OsgBniNfWypmRecaZsFl/sD6YKT0bV1vvv\nU0uhYTDx5z7o8ahvjBwOqF5sDDVX02umFBoG16zd3hpOJrGSh+ESpJhWw5dV6m5J\n530zPFObyt2kI9+E75+G/w==\n-----END PRIVATE KEY-----\n", + "client_email": "dev-deid-600@aou-res-deid-vumc-test.iam.gserviceaccount.com", + "client_id": "104228831510203920964", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/dev-deid-600%40aou-res-deid-vumc-test.iam.gserviceaccount.com" +}