If you're going to publish an application for others to use beside yourself, you'll want to harden the code to protect against things like malicious queries, add more elegant error handling and explanatory labels, improve the style, and otherwise scale it for enterprise use.
Meanwhile, though, this code should get you started creating an interactive application for querying a data set with natural language:
library(shiny)
library(openai)
library(dplyr)
library(sqldf)
# Load hard-coded dataset
states <- read.csv("states.csv") |>
dplyr::filter(!is.na(Region) & Region != "")
states_schema <- sqldf::sqldf("PRAGMA table_info(states)")
states_schema_string <- paste(apply(states_schema, 1, paste, collapse = "\t"), collapse = "\n")
states_sample <- dplyr::sample_n(states, 3)
states_sample_string <- paste(apply(states_sample, 1, paste, collapse = "\t"), collapse = "\n")
# Function to process user input
get_prompt <- function(query, schema = states_schema_string, rows_sample = states_sample_string, table_name = "states") {
my_prompt <- glue::glue("Act as if you're a data scientist. You have a SQLite table named {table_name} with the following schema:
```
{schema}
```
The first rows look like this:
```{rows_sample}```
Based on this data, write a SQL query to answer the following question: {query} Return the SQL query ONLY. Do not include any additional explanation.")
print(my_prompt)
return(my_prompt)
}
ui <- fluidPage(
titlePanel("Query state database"),
sidebarLayout(
sidebarPanel(
textInput("query", "Enter your query", placeholder = "e.g., What is the total 2020 population by Region?"),
actionButton("submit_btn", "Submit")
),
mainPanel(
uiOutput("the_sql"),
br(),
br(),
verbatimTextOutput("results")
)
)
)
server <- function(input, output) {
# Create the prompt from the user query to send to GPT
the_prompt <- eventReactive(input$submit_btn, {
req(input$query, states_schema_string, states_sample_string)
my_prompt <- get_prompt(query = input$query)
})
# send prompt to GPT, get SQL, run SQL, print results
observeEvent(input$submit_btn, {
req(the_prompt()) # text to send to GPT
# Send results to GPT and get response
# withProgress adds a Shiny progress bar. Commas now needed after each statement
withProgress(message = 'Getting results from GPT', value = 0, { # Add Shiny progress message
my_results <- openai::create_chat_completion(model = "gpt-3.5-turbo", temperature = 0, messages = list(
list(role = "user", content = the_prompt())
))
the_gpt_sql <- my_results$choices$message.content
# print the SQL
sql_html <- gsub("\n", "<br />", the_gpt_sql)
sql_html <- paste0("<p>", sql_html, "</p>")
# Run SQL on data to get results
gpt_answer <- sqldf(the_gpt_sql)
setProgress(value = 1, message = 'GPT results received') # Send msg to user that
})
# Print SQL and results
output$the_sql <- renderUI(HTML(sql_html))
if (is.vector(gpt_answer) ) {
output$results <- renderPrint(gpt_answer)
} else {
output$results <- renderPrint({ print(gpt_answer) })
}
})
}
shinyApp(ui = ui, server = server)