|
|
@ -32,10 +32,18 @@ Usage :
|
|
|
|
from healthcareio.params import SYS_ARGS
|
|
|
|
from healthcareio.params import SYS_ARGS
|
|
|
|
from transport import factory
|
|
|
|
from transport import factory
|
|
|
|
import requests
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from healthcareio import analytics
|
|
|
|
|
|
|
|
from healthcareio import server
|
|
|
|
from healthcareio.parser import get_content
|
|
|
|
from healthcareio.parser import get_content
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
import json
|
|
|
|
import json
|
|
|
|
import sys
|
|
|
|
import sys
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
from multiprocessing import Process
|
|
|
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PATH = os.sep.join([os.environ['HOME'],'.healthcareio'])
|
|
|
|
PATH = os.sep.join([os.environ['HOME'],'.healthcareio'])
|
|
|
|
OUTPUT_FOLDER = os.sep.join([os.environ['HOME'],'healthcare-io'])
|
|
|
|
OUTPUT_FOLDER = os.sep.join([os.environ['HOME'],'healthcare-io'])
|
|
|
|
INFO = None
|
|
|
|
INFO = None
|
|
|
@ -60,7 +68,8 @@ def register (**args) :
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
|
|
|
#
|
|
|
|
headers = {"email":email,"client":platform.node()}
|
|
|
|
store = args['store'] if 'store' in args else 'sqlite'
|
|
|
|
|
|
|
|
headers = {"email":email,"client":platform.node(),"store":store,"db":args['db']}
|
|
|
|
http = requests.session()
|
|
|
|
http = requests.session()
|
|
|
|
r = http.post(url,headers=headers)
|
|
|
|
r = http.post(url,headers=headers)
|
|
|
|
|
|
|
|
|
|
|
@ -82,22 +91,6 @@ def register (**args) :
|
|
|
|
# Create the sqlite3 database to
|
|
|
|
# Create the sqlite3 database to
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def analytics(**args):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
This fucntion will only compute basic distributions of a given feature for a given claim
|
|
|
|
|
|
|
|
@args
|
|
|
|
|
|
|
|
@param x: vector of features to process
|
|
|
|
|
|
|
|
@param apply: operation to be applied {dist}
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
if args['apply'] in ['dist','distribution'] :
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
This section of the code will return the distribution of a given space.
|
|
|
|
|
|
|
|
It is intended to be applied on several claims/remits
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
x = pd.DataFrame(args['x'],columns=['x'])
|
|
|
|
|
|
|
|
return x.groupby(['x']).size().to_frame().T.to_dict(orient='record')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def log(**args):
|
|
|
|
def log(**args):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
This function will perform a log of anything provided to it
|
|
|
|
This function will perform a log of anything provided to it
|
|
|
@ -152,7 +145,39 @@ def parse(**args):
|
|
|
|
|
|
|
|
|
|
|
|
return get_content(args['filename'],CONFIG,SECTION)
|
|
|
|
return get_content(args['filename'],CONFIG,SECTION)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def apply(files,store_info,logger_info=None):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
:files list of files to be processed in this given thread/process
|
|
|
|
|
|
|
|
:store_info information about data-store, for now disk isn't thread safe
|
|
|
|
|
|
|
|
:logger_info information about where to store the logs
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not logger_info :
|
|
|
|
|
|
|
|
logger = factory.instance(type='disk.DiskWriter',args={'path':os.sep.join([info['out-folder'],SYS_ARGS['parse']+'.log'])})
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
logger = factory.instance(**logger_info)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
writer = factory.instance(**store_info)
|
|
|
|
|
|
|
|
for filename in files :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if filename.strip() == '':
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
# content,logs = get_content(filename,CONFIG,CONFIG['SECTION'])
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
content,logs = parse(filename = filename,type=SYS_ARGS['parse'])
|
|
|
|
|
|
|
|
if content :
|
|
|
|
|
|
|
|
writer.write(content)
|
|
|
|
|
|
|
|
if logs :
|
|
|
|
|
|
|
|
[logger.write(dict(_row,**{"parse":SYS_ARGS['parse']})) for _row in logs]
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
logger.write({"parse":SYS_ARGS['parse'],"name":filename,"completed":True,"rows":len(content)})
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
logger.write({"parse":SYS_ARGS['parse'],"filename":filename,"completed":False,"rows":-1,"msg":e.args[0]})
|
|
|
|
|
|
|
|
# print ([filename,len(content)])
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# @TODO: forward this data to the writer and log engine
|
|
|
|
|
|
|
|
#
|
|
|
|
def upgrade(**args):
|
|
|
|
def upgrade(**args):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:email provide us with who you are
|
|
|
|
:email provide us with who you are
|
|
|
@ -175,8 +200,9 @@ if __name__ == '__main__' :
|
|
|
|
|
|
|
|
|
|
|
|
email = SYS_ARGS['signup'].strip() if 'signup' in SYS_ARGS else SYS_ARGS['init']
|
|
|
|
email = SYS_ARGS['signup'].strip() if 'signup' in SYS_ARGS else SYS_ARGS['init']
|
|
|
|
url = SYS_ARGS['url'] if 'url' in SYS_ARGS else 'https://healthcareio.the-phi.com'
|
|
|
|
url = SYS_ARGS['url'] if 'url' in SYS_ARGS else 'https://healthcareio.the-phi.com'
|
|
|
|
|
|
|
|
store = SYS_ARGS['store'] if 'store' in SYS_ARGS else 'sqlite'
|
|
|
|
register(email=email,url=url)
|
|
|
|
db='healthcareio' if 'db' not in SYS_ARGS else SYS_ARGS['db']
|
|
|
|
|
|
|
|
register(email=email,url=url,store=store,db=db)
|
|
|
|
# else:
|
|
|
|
# else:
|
|
|
|
# m = """
|
|
|
|
# m = """
|
|
|
|
# usage:
|
|
|
|
# usage:
|
|
|
@ -218,46 +244,95 @@ if __name__ == '__main__' :
|
|
|
|
# CONFIG = CONFIG[ int(SYS_ARGS['version'])]
|
|
|
|
# CONFIG = CONFIG[ int(SYS_ARGS['version'])]
|
|
|
|
# else:
|
|
|
|
# else:
|
|
|
|
# CONFIG = CONFIG[-1]
|
|
|
|
# CONFIG = CONFIG[-1]
|
|
|
|
|
|
|
|
logger = factory.instance(type='disk.DiskWriter',args={'path':os.sep.join([info['out-folder'],SYS_ARGS['parse']+'.log'])})
|
|
|
|
if info['store']['type'] == 'disk.DiskWriter' :
|
|
|
|
if info['store']['type'] == 'disk.DiskWriter' :
|
|
|
|
info['store']['args']['path'] += (os.sep + 'healthcare-io.json')
|
|
|
|
info['store']['args']['path'] += (os.sep + 'healthcare-io.json')
|
|
|
|
elif info['store']['type'] == 'disk.SQLiteWriter' :
|
|
|
|
elif info['store']['type'] == 'disk.SQLiteWriter' :
|
|
|
|
# info['store']['args']['path'] += (os.sep + 'healthcare-io.db3')
|
|
|
|
# info['store']['args']['path'] += (os.sep + 'healthcare-io.db3')
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if info['store']['type'] == 'disk.SQLiteWriter' :
|
|
|
|
if info['store']['type'] == 'disk.SQLiteWriter' :
|
|
|
|
info['store']['args']['table'] = SYS_ARGS['parse'].strip().lower()
|
|
|
|
info['store']['args']['table'] = SYS_ARGS['parse'].strip().lower()
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# if we are working with no-sql we will put the logs in it (performance )?
|
|
|
|
|
|
|
|
|
|
|
|
info['store']['args']['doc'] = SYS_ARGS['parse'].strip().lower()
|
|
|
|
info['store']['args']['doc'] = SYS_ARGS['parse'].strip().lower()
|
|
|
|
|
|
|
|
_info = json.loads(json.dumps(info['store']))
|
|
|
|
|
|
|
|
_info['args']['doc'] = 'logs'
|
|
|
|
|
|
|
|
logger = factory.instance(**_info)
|
|
|
|
|
|
|
|
|
|
|
|
writer = factory.instance(**info['store'])
|
|
|
|
writer = factory.instance(**info['store'])
|
|
|
|
logger = factory.instance(type='disk.DiskWriter',args={'path':os.sep.join([info['out-folder'],SYS_ARGS['parse']+'.log'])})
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# we need to have batches ready for this in order to run some of these queries in parallel
|
|
|
|
|
|
|
|
# @TODO: Make sure it is with a persistence storage (not disk .. not thread/process safe yet)
|
|
|
|
|
|
|
|
# - Make sure we can leverage this on n-cores later on, for now the assumption is a single core
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
BATCH_COUNT = 1 if 'batch' not in SYS_ARGS else int (SYS_ARGS['batch'])
|
|
|
|
|
|
|
|
|
|
|
|
#logger = factory.instance(type='mongo.MongoWriter',args={'db':'healthcareio','doc':SYS_ARGS['parse']+'_logs'})
|
|
|
|
#logger = factory.instance(type='mongo.MongoWriter',args={'db':'healthcareio','doc':SYS_ARGS['parse']+'_logs'})
|
|
|
|
# schema = info['schema']
|
|
|
|
# schema = info['schema']
|
|
|
|
|
|
|
|
|
|
|
|
# for key in schema :
|
|
|
|
# for key in schema :
|
|
|
|
# sql = schema[key]['create']
|
|
|
|
# sql = schema[key]['create']
|
|
|
|
# writer.write(sql)
|
|
|
|
# writer.write(sql)
|
|
|
|
for filename in files :
|
|
|
|
files = np.array_split(files,BATCH_COUNT)
|
|
|
|
|
|
|
|
procs = []
|
|
|
|
|
|
|
|
index = 0
|
|
|
|
|
|
|
|
for row in files :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
row = row.tolist()
|
|
|
|
|
|
|
|
logger.write({"process":index,"parse":SYS_ARGS['parse'],"file_count":len(row)})
|
|
|
|
|
|
|
|
proc = Process(target=apply,args=(row,info['store'],_info,))
|
|
|
|
|
|
|
|
proc.start()
|
|
|
|
|
|
|
|
procs.append(proc)
|
|
|
|
|
|
|
|
index = index + 1
|
|
|
|
|
|
|
|
while len(procs) > 0 :
|
|
|
|
|
|
|
|
procs = [proc for proc in procs if proc.is_alive()]
|
|
|
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
# for filename in files :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# if filename.strip() == '':
|
|
|
|
|
|
|
|
# continue
|
|
|
|
|
|
|
|
# # content,logs = get_content(filename,CONFIG,CONFIG['SECTION'])
|
|
|
|
|
|
|
|
# #
|
|
|
|
|
|
|
|
# try:
|
|
|
|
|
|
|
|
# content,logs = parse(filename = filename,type=SYS_ARGS['parse'])
|
|
|
|
|
|
|
|
# if content :
|
|
|
|
|
|
|
|
# writer.write(content)
|
|
|
|
|
|
|
|
# if logs :
|
|
|
|
|
|
|
|
# [logger.write(dict(_row,**{"parse":SYS_ARGS['parse']})) for _row in logs]
|
|
|
|
|
|
|
|
# else:
|
|
|
|
|
|
|
|
# logger.write({"parse":SYS_ARGS['parse'],"name":filename,"completed":True,"rows":len(content)})
|
|
|
|
|
|
|
|
# except Exception as e:
|
|
|
|
|
|
|
|
# logger.write({"parse":SYS_ARGS['parse'],"filename":filename,"completed":False,"rows":-1,"msg":e.args[0]})
|
|
|
|
|
|
|
|
# # print ([filename,len(content)])
|
|
|
|
|
|
|
|
# #
|
|
|
|
|
|
|
|
# # @TODO: forward this data to the writer and log engine
|
|
|
|
|
|
|
|
# #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if filename.strip() == '':
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
# content,logs = get_content(filename,CONFIG,CONFIG['SECTION'])
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
content,logs = parse(filename = filename,type=SYS_ARGS['parse'])
|
|
|
|
|
|
|
|
if content :
|
|
|
|
|
|
|
|
writer.write(content)
|
|
|
|
|
|
|
|
if logs :
|
|
|
|
|
|
|
|
[logger.write(_row) for _row in logs]
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
logger.write({"name":filename,"completed":True,"rows":len(content)})
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
|
|
logger.write({"filename":filename,"completed":False,"rows":-1,"msg":e.args[0]})
|
|
|
|
|
|
|
|
# print ([filename,len(content)])
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
# @TODO: forward this data to the writer and log engine
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
|
|
|
|
elif 'analytics' in SYS_ARGS :
|
|
|
|
|
|
|
|
PORT = int(SYS_ARGS['port']) if 'port' in SYS_ARGS else 5500
|
|
|
|
|
|
|
|
DEBUG= int(SYS_ARGS['debug']) if 'debug' in SYS_ARGS else 0
|
|
|
|
|
|
|
|
SYS_ARGS['context'] = SYS_ARGS['context'] if 'context' in SYS_ARGS else ''
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# PATH= SYS_ARGS['config'] if 'config' in SYS_ARGS else os.sep.join([os.environ['HOME'],'.healthcareio','config.json'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
e = analytics.engine(os.sep.join([PATH,'config.json'])) #--@TODO: make the configuration file globally accessible
|
|
|
|
|
|
|
|
e.apply(type='claims',serialize=True)
|
|
|
|
|
|
|
|
SYS_ARGS['engine'] = e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pointer = lambda : server.app.run(host='0.0.0.0',port=PORT,debug=DEBUG,threaded=False)
|
|
|
|
|
|
|
|
pthread = Process(target=pointer,args=())
|
|
|
|
|
|
|
|
pthread.start()
|
|
|
|
|
|
|
|
|
|
|
|
elif 'export' in SYS_ARGS:
|
|
|
|
elif 'export' in SYS_ARGS:
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# this function is designed to export the data to csv
|
|
|
|
# this function is designed to export the data to csv
|
|
|
@ -267,7 +342,17 @@ if __name__ == '__main__' :
|
|
|
|
if set([format]) not in ['xls','csv'] :
|
|
|
|
if set([format]) not in ['xls','csv'] :
|
|
|
|
format = 'csv'
|
|
|
|
format = 'csv'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
msg = """
|
|
|
|
|
|
|
|
CLI Usage
|
|
|
|
|
|
|
|
healthcare-io.py --register <email> --store <sqlite|mongo>
|
|
|
|
|
|
|
|
healthcare-io.py --parse claims --folder <path> [--batch <value>]
|
|
|
|
|
|
|
|
healthcare-io.py --parse remits --folder <path> [--batch <value>]
|
|
|
|
|
|
|
|
parameters :
|
|
|
|
|
|
|
|
--<[signup|init]> signup or get a configuration file from a parsing server
|
|
|
|
|
|
|
|
--store data store mongo or sqlite
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
print(msg)
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
# """
|
|
|
|
# """
|
|
|
|
# The program was called from the command line thus we are expecting
|
|
|
|
# The program was called from the command line thus we are expecting
|
|
|
|