adding support for llm

main
Steve Nyemba 2 weeks ago
parent 3d81a192b1
commit 7b8ed0bda0

@ -10,7 +10,14 @@
"logo": "www/html/_assets/images/logo.png",
"source": {
"id": "disk",
"key": "/home/steve/dev/data/qcms/data-transport.key"
"key": "/home/steve/dev/data/qcms/data-transport.key",
"llm": "/home/steve/dev/data/qcms/azure-openai.json",
"catalogs":{
"duckdb":{"sql":"SELECT table_name, TO_JSON(ARRAY_AGG(column_name)) as columns FROM INFORMATION_SCHEMA.COLUMNS GROUP BY table_name"},
"bigquery":{"sql":"SELECT table_name, column_name, data_type FROM aou-res-curation-output-prod.R2024Q3R4.INFORMATION_SCHEMA.COLUMNS WHERE REGEXP_CONTAINS(table_name,'^(cb_|_.+|.*_map.*)$') IS FALSE AND REGEXP_CONTAINS(column_name ,'^.*(_id|_value|_code)$') ORDER BY table_name","args":["dataset"]},
"postgresql":{"sql":"select table_name, column_name,data_type from information_schema.columns where table_schema = 'public' order by table_name"},
"sqlite":{"sql":"select tbl_name as table_name, json_group_array(y.name) as columns from sqlite_master x INNER JOIN PRAGMA_TABLE_INFO(tbl_name) y group by table_name'}"}
}
},
"theme": "default",
"version": "0.1"
@ -41,12 +48,13 @@
"root": "www/html"
},
"plugins": {
"dbe": [
"transport": [
"get",
"providers",
"apply",
"version"
],
"agent":["apply","enabled"],
"register":["get","add","technologies"],
"io": [
"read", "open", "write"

@ -1,2 +1,3 @@
xlsxwriter
openpyxl
smart-logger@git+https://dev.the-phi.com/git/library/smart-top

@ -14,7 +14,9 @@
}
.studio-dialog {
width:650px;
}
.gradient {background-image: linear-gradient(to top, #cfd9df 0%, #e2ebf0 100%);}
.studio-expand {
display:grid;

@ -0,0 +1,35 @@
var ai = {}
ai.toggle = function (){
if($('.ai-button').html().match(/check/i)){
ai.disable()
}else{
ai.enable()
}
sessionStorage.AI = _val
$('.ai-button').html (_icon)
$('.ai-status').html(_status).css({color:_color})
}
ai.enable = function (){
_val = 1
_status = 'Enabled'
_color = '#4682B4'
_icon = '<i class="fa-solid fa-check" style="color:green"></i>'
$('.ai-workbench-status').html('AI Enabled').css({'color':'#4682b4'})
sessionStorage.AI = _val
$('.ai-button').html (_icon)
$('.ai-status').html(_status).css({color:_color})
}
ai.disable = function (){
_val = 0
_status = 'Disabled'
_color = 'maroon'
_icon = '<i class="fa-solid fa-times" style="color:maroon"></i>'
$('.ai-workbench-status').html('AI Disabled').css({'color':'maroon'})
sessionStorage.AI = _val
$('.ai-button').html (_icon)
$('.ai-status').html(_status).css({color:_color})
}

@ -16,19 +16,19 @@ studio.init = function (_context){
studio._context = _context
}
studio.dbe = {}
studio.dbe.get = function (){
var uri = [studio._context,'api/dbe/get']
uri = uri.join('/')
var http = HttpClient.instance()
http.get(uri,function(x){
// studio.dbe.get = function (){
// var uri = [studio._context,'api/dbe/get']
// uri = uri.join('/')
// var http = HttpClient.instance()
// http.get(uri,function(x){
})
// })
}
// }
studio.dbe.providers = function (_render){
var uri = [studio._context,'api/dbe/providers']
var uri = [studio._context,'api/transport/providers']
uri = uri.join('/')
var http = HttpClient.instance()
http.get(uri,function(x){
@ -41,23 +41,23 @@ studio.dbe.providers = function (_render){
}
studio.dbe.apply = function (label,_query){
var uri = [studio._context,'api/dbe/apply']
uri = uri.join('/')
// studio.dbe.apply = function (label,_query){
// var uri = [studio._context,'api/transport/apply']
// uri = uri.join('/')
var http = HttpClient.instance()
_data = {'label':label,'query':_query}
http.setHeader('Content-Type','application/json')
http.setData (JSON.stringify(_data))
http.get(uri,function(x){
if(x.status == 200 && x.readyState == 4){
_r = JSON.stringify(x.responseText)
}else{
alert(' error round '+x.responseText)
}
})
}
// var http = HttpClient.instance()
// _data = {'label':label,'query':_query}
// http.setHeader('Content-Type','application/json')
// http.setData (JSON.stringify(_data))
// http.get(uri,function(x){
// if(x.status == 200 && x.readyState == 4){
// _r = JSON.stringify(x.responseText)
// }else{
// alert(' error round '+x.responseText)
// }
// })
// }
studio.grid = function (){
this.columns = function (_data){
var _columns = []
@ -195,9 +195,14 @@ studio.frame = function (_args){
//
// Render the template
this.read = function(_label,_query,_compute){
var uri = [studio._context,'api/dbe/apply']
this.read = function(_args,_query,_compute){
var _label = _args.label
var uri = [studio._context,'api/transport/apply']
uri = uri.join('/')
if (_args.AI && sessionStorage.AI){
uri = uri.replace(/transport/g,'agent')
}
// var _compute = this.compute
// $('.'+_label+' .output').html('<i class="fa-solid fa-cog fa-spin"></i> Please wait ...')
var http = HttpClient.instance()
@ -210,7 +215,14 @@ studio.frame = function (_args){
if(x.status == 200 && x.readyState == 4){
_r = JSON.parse(x.responseText)
// sessionStorage._data = x.responseText
if (_r.query == null){
_compute(_label,_r)
}else{
_compute(_label,_r.data)
var _id = '.code.'+_args.label.trim()
$(_id).val($(_id).val() + '\n'+_r['query'])
}
sessionStorage[_label] = _query
}else{
const parser = new DOMParser();
@ -288,13 +300,6 @@ studio.frame = function (_args){
_rows[sheet.name].values.push(rec)
}
})
console.log(_rows[sheet.name].values)
// if (sheet.ranges.length > 0){
// if(sheet.ranges[0].dataSource.length > 0){
// _data.push(sheet) //.ranges[0].dataSource)
// }
// }
})
if (_rows ){
http.setHeader('Content-Type','application/json')
@ -332,29 +337,34 @@ studio.frame = function (_args){
})
}
}
this.render = function (){
var _args = this._args
var _importSheet = this.open
var _exportSheet = this.export
var _icon = $('<img>').attr('src',this._args.icon)
var _label = $('<div class="bold"></div>').html(this._args.label+'<div style="font-weight:lighter; font-size:11px;">'+this._args.provider+'</div>')
_text = _args.provider
if (_args.AI){
//
// The current technology supports AI
_text = (parseInt(sessionStorage.AI) == 1)?(_text+', <b class="ai-workbench-status" style="color:#4682B4">AI Enabled </b>') : (_text+', <b <b class="ai-workbench-status" style="color:maroon">AI Disabled </b>')
}
// _text = (_args.AI && sessionStorage.AI)? (_text+', <b style="color:#4682B4">AI Enabled </b>') : _text
var _label = $('<div class="bold"></div>').html(this._args.label+'<div style="font-weight:lighter; font-size:11px;">'+_text+'</div>')
read = this.read
_compute = this.compute
_xbutton = $('<div class="border-left"><div class="active apply" align="center" title="Run Query"><i class="fa-solid fa-play"></i></div></div>').on('click',function (){
var _id = '.code.'+_args.label.trim()
_query = $(_id).val()
if (_query.length != ''){
var _data = read(_args.label,_query,_compute)
var _data = read(_args,_query,_compute)
}
})
_pythonCode = $('<div class="border-left" title="Python Code"><div class="active" align="center"><img src="api/disk/read?uri=www/html/_assets/images/python.png" style="width:20px" /></div></div>')
_pythonCode.on('click',()=>{
@ -409,7 +419,7 @@ studio.frame = function (_args){
spreadsheet.hideFileMenuItems(["File"], true);
})
var _buttons = $('<div style="display:grid; grid-template-columns: auto repeat(6,64px); gap:4px;" align="center"><div>&nbsp;</div></div>')
var _buttons = $('<div style="display:grid; grid-template-columns: auto repeat(6,64px); gap:4px; align-items:center; align-content:center;" align="center"><div>&nbsp;</div></div>')
_buttons.append(_pythonCode,_openFile,_saveFile,_wizButton,_xbutton,_expandButton)
// _frame = $('<div class="studio :label"></div>'.replace(/:label/,this._args.label))
@ -421,11 +431,15 @@ studio.frame = function (_args){
_textarea = $('<textarea class="code"></textarea>').addClass(this._args.label).attr('label',_args.label)
if (studio.defaults[_args.provider] != null){
_query = studio.defaults[_args.provider]
if (sessionStorage[_args.label]) {
_query = ('-- '+_query.replace(/\n/g,' ')+'\n') + sessionStorage[_args.label]
}
$(_textarea).val(_query)
}
$(_textarea).on('keydown',function (_e){
if (_e.shiftKey && (_e.keyCode == 13 || _e.key == 'Enter')){
if (/*_e.shiftKey*/ _e.ctrlKey && (_e.keyCode == 13 || _e.key == 'Enter'||_e.key == 'Return')){
var _id = $(this).attr('label')
_id = `.${_id} .apply`
$(_id).click()

@ -0,0 +1,213 @@
"""
This is code that will interact with an LLM (AzureOpenAI/Ollama) leveraging langchain
"""
# _CATALOG = {
# 'sqlite':{'sql':'select tbl_name as table_name, json_group_array(y.name) as columns from sqlite_master x INNER JOIN PRAGMA_TABLE_INFO(tbl_name) y group by table_name'} ,
# 'postgresql':{'sql':"SELECT table_name, to_json(array_agg(column_name)) as columns FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema = 'public' GROUP BY table_name"},
# 'bigquery':{'sql':'SELECT table_name, TO_JSON(ARRAY_AGG(column_name)) as columns FROM :dataset.INFORMATION_SCHEMA.COLUMNS','args':['dataset']},
# 'duckdb':{'sql' :'SELECT table_name, TO_JSON(ARRAY_AGG(column_name)) as columns FROM INFORMATION_SCHEMA.COLUMNS GROUP BY table_name'}
# }
# _CATALOG['sqlite3'] = _CATALOG['sqlite']
import transport
import json
import os
import cms
import pandas as pd
from langchain_openai import AzureOpenAI, AzureChatOpenAI
from langchain_ollama import OllamaEmbeddings, OllamaLLM
from langchain_core.messages import HumanMessage, AIMessage
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
class Agent :
def __init__(self,**_args) :
"""
:backend OpenAI or Ollama
:kwargs The initialization parameters associated with the backend
The arguments will contain temperature and model name to be used
"""
_instance = AzureChatOpenAI if _args['backend'].lower() in ['azureopenai', 'openai'] else OllamaLLM
self._llm = _instance(**_args['kwargs'])
def isSQL(self,_question):
_template = """Is the provided text a valid sql statmement. Yes or No? Your answer is a properly formatted JSON object with three attributes.
class (1 for valid sql statement and 0 for not a valid sql statement), explanation place a short explanation for the answer and original containing with the original text.
text:
{input_text}
"""
_prompt = PromptTemplate(temperature=0.1,input_variables=['input_text'],template=_template)
r = self.apply(_prompt,input_text=_question)
#
# @TODO: Make sure the response is properly formatted (output not to be trusted)
return json.loads(r)
def apply(self,_prompt,**_args):
chain = (
RunnablePassthrough.assign()
| _prompt
| self._llm
| StrOutputParser())
_out = chain.invoke(_args)
return _out #son.loads(_out)
def toSQL(self,_question,_catalog,_about):
_template="""Your task is to convert a question to an SQL query. The query will run on schema that will be provided in csv format.
Output:
The expected output will be a JSON object with two attributes sql and tables:
- "sql": the SQL query to be executed.
- "tables": list of relevant tables used.
Guidelines:
- If the question can not be answered with the provided schema return empty string in the sql attribute
- Parse the question word by word so as to be able to identify tables, fields and operations associated (Joins, filters ...)
- Under no circumstances will you provide an explanation of tables or reasoning detail.
question:
{question}
Database schema:
{catalog}
additional information:
{context}
"""
_prompt = PromptTemplate(temperature=0.1,input_variables=['question','catalog','context'],template=_template)
r = self.apply(_prompt,question=_question,catalog=_catalog,context=_about)
# print (' ############### ------------- #####################')
# print (r)
# print (' #########################################')
#
# @TODO: Make sure the response is properly formatted (output not to be trusted)
if '```json' in r :
r = r.split('```json')[-1].replace('```','')
print (r)
return json.loads(r)
#
# We are building an interface to execute an sql query
#
def AIProxy (_label,_query,_path,_CATALOG) :
_qreader = transport.get.reader(label=_label)
_entry = transport.registry.get(_label)
_provider =_entry['provider']
_about = _entry.get('about','')
_database = _entry['database'] if 'database' in _entry else ''
if 'dataset' in _entry :
_database = _entry['dataset']
_about = f'{_about}, with dataset name {_database}'
else:
_about = f'{_about}, with the database name {_database}'
_catalog = None
_kwargs = None
_data = pd.DataFrame()
r = None
try:
#
# we should run the command here, assuming a valid query
#
_data = _qreader.apply(_query)
except Exception as e:
#
# here we are assuming we are running text to be translated as a query
# we need to make sure that we have a JSON configurator
#
if _provider in _CATALOG and os.path.exists(_path):
#
# -- reading arguments from the LLM config file, {backend,kwargs}
_kwargs = json.loads((open(_path)).read())
_agent = Agent(**_kwargs)
_qcat = _CATALOG[_provider]
# if 'args' in _CATALOG[_provider] :
# _entry = transport.registry.get(_label)
# for _var in _CATALOG[_provider]['args'] :
# if _var in _entry:
# _value = _entry[_var]
# _about = f'{_about}\n{_var} = {_value}'
# _qcat['sql'] = _qcat['sql'].replace(f':{_var}',_value)
_catalog = _qreader.read(**_qcat).to_csv(index=0)
_about = f"The queries will run on {_provider} database.\n{_about}"
r = _agent.toSQL(_query,_catalog, _about)
_data = _qreader.apply(r['sql'])
#
# returning the data and the information needed
#
_data = _data.to_dict(orient='split')
del _data['index']
if r :
return {'data':_data,'query':r['sql']}
return _data
# if not _path :
# #
# # exececute the query as is !
# pass
# else:
# if _provider in _CATALOG and os.path.exists(_path) :
# #
# # As the agent if it is an SQL Query
# f = open(_path)
# _kwargs = json.loads( f.read() )
# f.close()
# try:
# print ([f"Running Model {_kwargs['kwargs']['model']}"])
# _agent = Agent(**_kwargs)
# # r = _agent.isSQL(_query)
# # print (f"****** {_query}\n{r['class']}")
# # if r and int(r['class']) == 0 :
# _catalog = _qreader.read(**_CATALOG[_provider]).to_csv(index=0)
# # print (['****** TABLES FOUND ', _catalog])
# _about = _about if _about else ''
# r = _agent.toSQL(_query,_catalog,f"This is a {_provider} database, and queries generated should account for this. {_about}")
# _query = r['sql']
# # else:
# # #
# # # provided an sql query
# # pass
# except Exception as e:
# #
# # AI Service is unavailable ... need to report this somehow
# print (e)
# else:
# #
# # Not in catalog ...
# pass
# _data = _qreader.apply(_query)
# if _data.shape[0] :
# _data = _data.astype(str).to_dict(orient='split')
# if 'index' in _data :
# del _data['index']
# return _data
@cms.Plugin(mimetype="application/json",method="POST")
def apply (**_args):
_request = _args['request']
_label = _request.json['label']
_query = _request.json['query']
_source = _args['config']['system']['source']
_path = _source.get('llm',None)
_CATALOGS = _source.get('catalogs',{})
return AIProxy(_label,_query,_path,_CATALOGS)
@cms.Plugin(mimetype="text/plain")
def enabled(**_args):
_config = _args['config']
return str(int('llm' in _config['system']['source'] ))

@ -14,6 +14,10 @@ def get (**_args) :
transport.registry.load()
_data = copy.copy(transport.registry.DATA)
_context = _args['config']['system']['context']
print ()
_CATALOGS = _args['config']['system']['source'].get('catalogs',{})
_HAS_LLM = 'llm' in _args['config']['system']['source']
_labels = []
for _key in _data :
@ -27,7 +31,7 @@ def get (**_args) :
_table = 'NA' if not _name else _data[_key][_name]
_plugins = [] if 'plugins' not in _data[_key]else _data[_key]['plugins']
_icon = f'{_context}/api/disk/read?uri=www/html/_assets/images/{_provider}.png'
_labels.append({"label":_key,"provider":_provider,'table':_table,'icon':_icon})
_labels.append({"label":_key,"provider":_provider,'table':_table,'icon':_icon,'AI': (_provider in _CATALOGS and _HAS_LLM)})
else:
continue
return _labels

@ -42,11 +42,12 @@
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Material+Icons" />
-->
<script src="www/html/_assets/js/studio.js"></script>
<script src="www/html/_assets/js/ai.js"></script>
<script>
var Initialize =function(){
var http = HttpClient.instance()
uri = 'api/dbe/version'
uri = 'api/transport/version'
http.get(uri,(x)=>{
var _html = ('<div>Powered By <span style="text-transform:lowercase; color:#4682B4;">data-transport</span> '+x.responseText+'</div>')
$

@ -35,6 +35,24 @@
</style>
<script>
var _aiInit = function (){
var http = HttpClient.instance()
uri = '{{system.context}}/api/agent/enabled'
http.get(uri,function(x){
if(x.responseText == 0){
$('.ai-button').remove()
sessionStorage.AI = "0"
}
if (sessionStorage.AI == "1"){
ai.enable()
}else{
ai.disable()
}
})
}
var _init = function (_term){
var http = HttpClient.instance()
var uri = (['{{system.context}}','api/register/get']).join('/')
@ -49,6 +67,7 @@
labels.forEach(_data => {
_icon = $('<img/>').attr('src',_data.icon) //'www/html/_assets/images/'+_data.provider+'.png')
_label = $('<div></div>').html(_data.label)
_div = $('<div class="active button"></div>').append(_icon,_label).attr('label',_data.label).attr('provider',_data.provider)
@ -60,6 +79,9 @@
// })
$('.studio-pane').css({display:'grid'})
$('.studio-intro').slideUp(()=>{
this.work_bench.show()
})
if ($(_id).length == 0){
// _object = new studio.frame(this._data)
// _object.render()
@ -69,9 +91,7 @@
}
// var _label = this._data.label
$('.studio-intro').slideUp(()=>{
this.work_bench.show()
})
}
@ -83,6 +103,7 @@
_find()
}
})
}
var _find = function (){
@ -94,7 +115,7 @@
_item = _nodes[_index]
p = $(_item).attr('provider')
q = $(_item).attr('label')
rgx = new RegExp(_val)
rgx = new RegExp(_val,'i')
if (p.match(rgx) || q.match(rgx)){
$(_item).slideDown()
}else{
@ -117,6 +138,7 @@
$(document).ready(function (){
_init()
_aiInit()
var _layout = {on:{load:{}}}
// $('.pane .tabs label')[0].click()
@ -158,6 +180,16 @@
<i class="fa-solid fa-plug-circle-plus" style="color:#4682b4;"></i> New connection
</div>
</div>
<div class="border" style="margin-top:4px;padding:4px; background-color: #f3f3f3;">
<div class="active" style="margin-top:4px" onclick="dialog.show({uri:'www/html/setup-ai.html',title:'Setup AI',context:'{{system.context}}'})">
<i class="fa-solid fa-microchip"></i> <span>Setup AI</span>
<span class="ai-status " style="margin-left:8px">Enabled</span>
</div>
</div>
<div class="_border-round border" style="margin-top:4px;padding:4px; background-color: #f3f3f3;">
<div class="active" style="margin:4px" onclick='window.open("https://dev.the-phi.com/git/data-transport")'>
<i class="fa-brands fa-git-alt"></i> <b>github</b>
@ -169,6 +201,7 @@
<i class="fa-brands fa-docker"></i> Docker | Podman container install
</div>
</div>
</div>

@ -35,11 +35,11 @@
<div style="width:650px;">
<div class="generated-code">
<div class="generated-code border-round" style="padding:8px; background-color: #f3f3f3;">
<span class="active copy" style="float:right; padding:4px" >
<i class="fa-solid fa-copy" ></i>
</span>
<pre style="margin:4px; padding:4px; background-color: transparent;">
<pre style="margin:4px; padding:4px;">
<code class="code language-python" style="line-height:1.5;text-wrap: wrap;">
"""
install data-transport

@ -0,0 +1,47 @@
<script>
$(document).ready(()=>{
var _aiTech = []
$('.labels').children().each((_index)=>{
var _data = $('.labels').children()[_index]._data
if(_data.AI){
_aiTech.push(_data.provider)
}
})
_aiTech = jx.utils.unique(_aiTech)
$('.ai-tech').html( _aiTech.join(' , '))
$('.ai-setup .ai-button').on('click',ai.toggle)
if (sessionStorage.AI == "0"){
ai.disable()
}else{
ai.enable()
}
})
</script>
<div class="studio-dialog ai-setup">
<p style="line-height:1.5; font-weight: lighter;">
Setup system wide AI capability to translate a prompt to an SQL query. This feature is supported for the following technologies :
<ul>
<div style="margin:4px;"><i class="fa-solid fa-minus"></i> <span class="ai-tech bold"></span></div>
<div style="margin:4px;"><i class="fa-solid fa-minus"></i> <span> Ollama & AzureOpenAI</span></div>
</ul>
<ul>
<br>
<p>
<div class="border" style="align-items:center; width:34%; margin-left:33%; padding:8px; display:grid; grid-template-columns: auto 64px; gap:8px; background-color: #f3f3f3;">
<div>
<i class="fa-solid fa-microchip"></i>
<span class="bold">AI</span>
<span class="ai-status bold">Enabled</span>
</div>
<div align="center" class="active ai-button" >
<span class="fa-solid fa-check" style="color:green"></span>
</div>
</div>
</p>
</ul>
</p>
</div>
Loading…
Cancel
Save