diff --git a/qcms-manifest.json b/qcms-manifest.json index cf5588b..7d58398 100644 --- a/qcms-manifest.json +++ b/qcms-manifest.json @@ -10,7 +10,14 @@ "logo": "www/html/_assets/images/logo.png", "source": { "id": "disk", - "key": "/home/steve/dev/data/qcms/data-transport.key" + "key": "/home/steve/dev/data/qcms/data-transport.key", + "llm": "/home/steve/dev/data/qcms/azure-openai.json", + "catalogs":{ + "duckdb":{"sql":"SELECT table_name, TO_JSON(ARRAY_AGG(column_name)) as columns FROM INFORMATION_SCHEMA.COLUMNS GROUP BY table_name"}, + "bigquery":{"sql":"SELECT table_name, column_name, data_type FROM aou-res-curation-output-prod.R2024Q3R4.INFORMATION_SCHEMA.COLUMNS WHERE REGEXP_CONTAINS(table_name,'^(cb_|_.+|.*_map.*)$') IS FALSE AND REGEXP_CONTAINS(column_name ,'^.*(_id|_value|_code)$') ORDER BY table_name","args":["dataset"]}, + "postgresql":{"sql":"select table_name, column_name,data_type from information_schema.columns where table_schema = 'public' order by table_name"}, + "sqlite":{"sql":"select tbl_name as table_name, json_group_array(y.name) as columns from sqlite_master x INNER JOIN PRAGMA_TABLE_INFO(tbl_name) y group by table_name'}"} + } }, "theme": "default", "version": "0.1" @@ -41,12 +48,13 @@ "root": "www/html" }, "plugins": { - "dbe": [ + "transport": [ "get", "providers", "apply", "version" ], + "agent":["apply","enabled"], "register":["get","add","technologies"], "io": [ "read", "open", "write" diff --git a/requirements.txt b/requirements.txt index af1be70..00a209d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ xlsxwriter openpyxl +smart-logger@git+https://dev.the-phi.com/git/library/smart-top diff --git a/www/html/_assets/css/studio.css b/www/html/_assets/css/studio.css index 7fd10e7..3612d1b 100644 --- a/www/html/_assets/css/studio.css +++ b/www/html/_assets/css/studio.css @@ -14,7 +14,9 @@ } - +.studio-dialog { + width:650px; +} .gradient {background-image: linear-gradient(to top, #cfd9df 0%, #e2ebf0 100%);} .studio-expand { display:grid; diff --git a/www/html/_assets/js/ai.js b/www/html/_assets/js/ai.js new file mode 100644 index 0000000..6442832 --- /dev/null +++ b/www/html/_assets/js/ai.js @@ -0,0 +1,35 @@ + var ai = {} + ai.toggle = function (){ + if($('.ai-button').html().match(/check/i)){ + ai.disable() + }else{ + ai.enable() + + } + sessionStorage.AI = _val + $('.ai-button').html (_icon) + $('.ai-status').html(_status).css({color:_color}) + } + ai.enable = function (){ + _val = 1 + _status = 'Enabled' + _color = '#4682B4' + _icon = '' + $('.ai-workbench-status').html('AI Enabled').css({'color':'#4682b4'}) + sessionStorage.AI = _val + $('.ai-button').html (_icon) + $('.ai-status').html(_status).css({color:_color}) + + + } + ai.disable = function (){ + _val = 0 + _status = 'Disabled' + _color = 'maroon' + _icon = '' + $('.ai-workbench-status').html('AI Disabled').css({'color':'maroon'}) + sessionStorage.AI = _val + $('.ai-button').html (_icon) + $('.ai-status').html(_status).css({color:_color}) + + } \ No newline at end of file diff --git a/www/html/_assets/js/studio.js b/www/html/_assets/js/studio.js index 27ece73..3406db4 100644 --- a/www/html/_assets/js/studio.js +++ b/www/html/_assets/js/studio.js @@ -16,19 +16,19 @@ studio.init = function (_context){ studio._context = _context } studio.dbe = {} -studio.dbe.get = function (){ - var uri = [studio._context,'api/dbe/get'] - uri = uri.join('/') - var http = HttpClient.instance() - http.get(uri,function(x){ +// studio.dbe.get = function (){ +// var uri = [studio._context,'api/dbe/get'] +// uri = uri.join('/') +// var http = HttpClient.instance() +// http.get(uri,function(x){ - }) +// }) -} +// } studio.dbe.providers = function (_render){ - var uri = [studio._context,'api/dbe/providers'] + var uri = [studio._context,'api/transport/providers'] uri = uri.join('/') var http = HttpClient.instance() http.get(uri,function(x){ @@ -41,23 +41,23 @@ studio.dbe.providers = function (_render){ } -studio.dbe.apply = function (label,_query){ - var uri = [studio._context,'api/dbe/apply'] - uri = uri.join('/') +// studio.dbe.apply = function (label,_query){ +// var uri = [studio._context,'api/transport/apply'] +// uri = uri.join('/') - var http = HttpClient.instance() - _data = {'label':label,'query':_query} - http.setHeader('Content-Type','application/json') - http.setData (JSON.stringify(_data)) - http.get(uri,function(x){ - if(x.status == 200 && x.readyState == 4){ - _r = JSON.stringify(x.responseText) - }else{ - alert(' error round '+x.responseText) - } - }) - -} +// var http = HttpClient.instance() +// _data = {'label':label,'query':_query} +// http.setHeader('Content-Type','application/json') +// http.setData (JSON.stringify(_data)) +// http.get(uri,function(x){ +// if(x.status == 200 && x.readyState == 4){ +// _r = JSON.stringify(x.responseText) +// }else{ +// alert(' error round '+x.responseText) +// } +// }) + +// } studio.grid = function (){ this.columns = function (_data){ var _columns = [] @@ -195,9 +195,14 @@ studio.frame = function (_args){ // // Render the template - this.read = function(_label,_query,_compute){ - var uri = [studio._context,'api/dbe/apply'] + this.read = function(_args,_query,_compute){ + var _label = _args.label + var uri = [studio._context,'api/transport/apply'] uri = uri.join('/') + if (_args.AI && sessionStorage.AI){ + uri = uri.replace(/transport/g,'agent') + } + // var _compute = this.compute // $('.'+_label+' .output').html(' Please wait ...') var http = HttpClient.instance() @@ -210,7 +215,14 @@ studio.frame = function (_args){ if(x.status == 200 && x.readyState == 4){ _r = JSON.parse(x.responseText) // sessionStorage._data = x.responseText + if (_r.query == null){ _compute(_label,_r) + }else{ + _compute(_label,_r.data) + var _id = '.code.'+_args.label.trim() + $(_id).val($(_id).val() + '\n'+_r['query']) + } + sessionStorage[_label] = _query }else{ const parser = new DOMParser(); @@ -288,13 +300,6 @@ studio.frame = function (_args){ _rows[sheet.name].values.push(rec) } }) - console.log(_rows[sheet.name].values) - // if (sheet.ranges.length > 0){ - // if(sheet.ranges[0].dataSource.length > 0){ - - // _data.push(sheet) //.ranges[0].dataSource) - // } - // } }) if (_rows ){ http.setHeader('Content-Type','application/json') @@ -332,29 +337,34 @@ studio.frame = function (_args){ }) } } - + this.render = function (){ var _args = this._args var _importSheet = this.open var _exportSheet = this.export var _icon = $('').attr('src',this._args.icon) - var _label = $('
').html(this._args.label+'
'+this._args.provider+'
') + _text = _args.provider + if (_args.AI){ + // + // The current technology supports AI + _text = (parseInt(sessionStorage.AI) == 1)?(_text+', AI Enabled ') : (_text+', AI Disabled ') + } + // _text = (_args.AI && sessionStorage.AI)? (_text+', AI Enabled ') : _text + var _label = $('
').html(this._args.label+'
'+_text+'
') read = this.read _compute = this.compute _xbutton = $('
').on('click',function (){ - - var _id = '.code.'+_args.label.trim() _query = $(_id).val() if (_query.length != ''){ - var _data = read(_args.label,_query,_compute) + var _data = read(_args,_query,_compute) } }) - + _pythonCode = $('
') _pythonCode.on('click',()=>{ @@ -409,7 +419,7 @@ studio.frame = function (_args){ spreadsheet.hideFileMenuItems(["File"], true); }) - var _buttons = $('
 
') + var _buttons = $('
 
') _buttons.append(_pythonCode,_openFile,_saveFile,_wizButton,_xbutton,_expandButton) // _frame = $('
'.replace(/:label/,this._args.label)) @@ -421,11 +431,15 @@ studio.frame = function (_args){ _textarea = $('').addClass(this._args.label).attr('label',_args.label) if (studio.defaults[_args.provider] != null){ _query = studio.defaults[_args.provider] + if (sessionStorage[_args.label]) { + + _query = ('-- '+_query.replace(/\n/g,' ')+'\n') + sessionStorage[_args.label] + } $(_textarea).val(_query) } $(_textarea).on('keydown',function (_e){ - if (_e.shiftKey && (_e.keyCode == 13 || _e.key == 'Enter')){ + if (/*_e.shiftKey*/ _e.ctrlKey && (_e.keyCode == 13 || _e.key == 'Enter'||_e.key == 'Return')){ var _id = $(this).attr('label') _id = `.${_id} .apply` $(_id).click() diff --git a/www/html/_plugins/agent.py b/www/html/_plugins/agent.py new file mode 100644 index 0000000..131b850 --- /dev/null +++ b/www/html/_plugins/agent.py @@ -0,0 +1,213 @@ +""" +This is code that will interact with an LLM (AzureOpenAI/Ollama) leveraging langchain +""" +# _CATALOG = { +# 'sqlite':{'sql':'select tbl_name as table_name, json_group_array(y.name) as columns from sqlite_master x INNER JOIN PRAGMA_TABLE_INFO(tbl_name) y group by table_name'} , +# 'postgresql':{'sql':"SELECT table_name, to_json(array_agg(column_name)) as columns FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = 'public' GROUP BY table_name"}, +# 'bigquery':{'sql':'SELECT table_name, TO_JSON(ARRAY_AGG(column_name)) as columns FROM :dataset.INFORMATION_SCHEMA.COLUMNS','args':['dataset']}, +# 'duckdb':{'sql' :'SELECT table_name, TO_JSON(ARRAY_AGG(column_name)) as columns FROM INFORMATION_SCHEMA.COLUMNS GROUP BY table_name'} +# } + +# _CATALOG['sqlite3'] = _CATALOG['sqlite'] +import transport +import json +import os +import cms +import pandas as pd +from langchain_openai import AzureOpenAI, AzureChatOpenAI +from langchain_ollama import OllamaEmbeddings, OllamaLLM +from langchain_core.messages import HumanMessage, AIMessage +from langchain.chains import LLMChain +from langchain_core.prompts import PromptTemplate +from langchain_core.runnables import RunnablePassthrough +from langchain_core.output_parsers import StrOutputParser + +class Agent : + def __init__(self,**_args) : + """ + :backend OpenAI or Ollama + :kwargs The initialization parameters associated with the backend + The arguments will contain temperature and model name to be used + """ + _instance = AzureChatOpenAI if _args['backend'].lower() in ['azureopenai', 'openai'] else OllamaLLM + self._llm = _instance(**_args['kwargs']) + def isSQL(self,_question): + _template = """Is the provided text a valid sql statmement. Yes or No? Your answer is a properly formatted JSON object with three attributes. + class (1 for valid sql statement and 0 for not a valid sql statement), explanation place a short explanation for the answer and original containing with the original text. + + text: + {input_text} + """ + _prompt = PromptTemplate(temperature=0.1,input_variables=['input_text'],template=_template) + + r = self.apply(_prompt,input_text=_question) + # + # @TODO: Make sure the response is properly formatted (output not to be trusted) + return json.loads(r) + def apply(self,_prompt,**_args): + + chain = ( + RunnablePassthrough.assign() + | _prompt + | self._llm + | StrOutputParser()) + _out = chain.invoke(_args) + + return _out #son.loads(_out) + def toSQL(self,_question,_catalog,_about): + _template="""Your task is to convert a question to an SQL query. The query will run on schema that will be provided in csv format. + +Output: +The expected output will be a JSON object with two attributes sql and tables: +- "sql": the SQL query to be executed. +- "tables": list of relevant tables used. + +Guidelines: +- If the question can not be answered with the provided schema return empty string in the sql attribute +- Parse the question word by word so as to be able to identify tables, fields and operations associated (Joins, filters ...) +- Under no circumstances will you provide an explanation of tables or reasoning detail. + +question: +{question} + +Database schema: +{catalog} + +additional information: +{context} + """ + + _prompt = PromptTemplate(temperature=0.1,input_variables=['question','catalog','context'],template=_template) + r = self.apply(_prompt,question=_question,catalog=_catalog,context=_about) + # print (' ############### ------------- #####################') + # print (r) + # print (' #########################################') + # + # @TODO: Make sure the response is properly formatted (output not to be trusted) + if '```json' in r : + r = r.split('```json')[-1].replace('```','') + print (r) + return json.loads(r) + + + +# +# We are building an interface to execute an sql query +# + +def AIProxy (_label,_query,_path,_CATALOG) : + _qreader = transport.get.reader(label=_label) + _entry = transport.registry.get(_label) + _provider =_entry['provider'] + _about = _entry.get('about','') + _database = _entry['database'] if 'database' in _entry else '' + + if 'dataset' in _entry : + _database = _entry['dataset'] + _about = f'{_about}, with dataset name {_database}' + else: + _about = f'{_about}, with the database name {_database}' + _catalog = None + _kwargs = None + _data = pd.DataFrame() + r = None + try: + # + # we should run the command here, assuming a valid query + # + _data = _qreader.apply(_query) + except Exception as e: + # + # here we are assuming we are running text to be translated as a query + # we need to make sure that we have a JSON configurator + # + if _provider in _CATALOG and os.path.exists(_path): + # + # -- reading arguments from the LLM config file, {backend,kwargs} + _kwargs = json.loads((open(_path)).read()) + _agent = Agent(**_kwargs) + _qcat = _CATALOG[_provider] + # if 'args' in _CATALOG[_provider] : + # _entry = transport.registry.get(_label) + # for _var in _CATALOG[_provider]['args'] : + # if _var in _entry: + # _value = _entry[_var] + # _about = f'{_about}\n{_var} = {_value}' + # _qcat['sql'] = _qcat['sql'].replace(f':{_var}',_value) + + + _catalog = _qreader.read(**_qcat).to_csv(index=0) + _about = f"The queries will run on {_provider} database.\n{_about}" + r = _agent.toSQL(_query,_catalog, _about) + + _data = _qreader.apply(r['sql']) + # + # returning the data and the information needed + # + _data = _data.to_dict(orient='split') + del _data['index'] + if r : + return {'data':_data,'query':r['sql']} + return _data + + + # if not _path : + # # + # # exececute the query as is ! + # pass + # else: + + # if _provider in _CATALOG and os.path.exists(_path) : + + # # + # # As the agent if it is an SQL Query + # f = open(_path) + # _kwargs = json.loads( f.read() ) + # f.close() + # try: + + # print ([f"Running Model {_kwargs['kwargs']['model']}"]) + # _agent = Agent(**_kwargs) + # # r = _agent.isSQL(_query) + + # # print (f"****** {_query}\n{r['class']}") + # # if r and int(r['class']) == 0 : + # _catalog = _qreader.read(**_CATALOG[_provider]).to_csv(index=0) + # # print (['****** TABLES FOUND ', _catalog]) + # _about = _about if _about else '' + # r = _agent.toSQL(_query,_catalog,f"This is a {_provider} database, and queries generated should account for this. {_about}") + # _query = r['sql'] + # # else: + # # # + # # # provided an sql query + # # pass + # except Exception as e: + # # + # # AI Service is unavailable ... need to report this somehow + # print (e) + # else: + # # + # # Not in catalog ... + # pass + # _data = _qreader.apply(_query) + # if _data.shape[0] : + # _data = _data.astype(str).to_dict(orient='split') + # if 'index' in _data : + # del _data['index'] + # return _data + + +@cms.Plugin(mimetype="application/json",method="POST") +def apply (**_args): + _request = _args['request'] + _label = _request.json['label'] + _query = _request.json['query'] + _source = _args['config']['system']['source'] + _path = _source.get('llm',None) + _CATALOGS = _source.get('catalogs',{}) + return AIProxy(_label,_query,_path,_CATALOGS) + +@cms.Plugin(mimetype="text/plain") +def enabled(**_args): + _config = _args['config'] + return str(int('llm' in _config['system']['source'] )) diff --git a/www/html/_plugins/register.py b/www/html/_plugins/register.py index 199172f..541887c 100644 --- a/www/html/_plugins/register.py +++ b/www/html/_plugins/register.py @@ -14,6 +14,10 @@ def get (**_args) : transport.registry.load() _data = copy.copy(transport.registry.DATA) _context = _args['config']['system']['context'] + print () + + _CATALOGS = _args['config']['system']['source'].get('catalogs',{}) + _HAS_LLM = 'llm' in _args['config']['system']['source'] _labels = [] for _key in _data : @@ -27,7 +31,7 @@ def get (**_args) : _table = 'NA' if not _name else _data[_key][_name] _plugins = [] if 'plugins' not in _data[_key]else _data[_key]['plugins'] _icon = f'{_context}/api/disk/read?uri=www/html/_assets/images/{_provider}.png' - _labels.append({"label":_key,"provider":_provider,'table':_table,'icon':_icon}) + _labels.append({"label":_key,"provider":_provider,'table':_table,'icon':_icon,'AI': (_provider in _CATALOGS and _HAS_LLM)}) else: continue return _labels diff --git a/www/html/_plugins/dbe.py b/www/html/_plugins/transport.py similarity index 100% rename from www/html/_plugins/dbe.py rename to www/html/_plugins/transport.py diff --git a/www/html/index.html b/www/html/index.html index e1023af..a75c21b 100644 --- a/www/html/index.html +++ b/www/html/index.html @@ -42,11 +42,12 @@ --> + +
+

+ Setup system wide AI capability to translate a prompt to an SQL query. This feature is supported for the following technologies : +

+ + +

+
\ No newline at end of file