How to use GPT as a natural language to SQL query engine

Generative AI output isn’t always reliable, but here’s how to improve your code and queries created by the technology behind ChatGPT, and prevent sending out sensitive data.

1 2 Page 2
Page 2 of 2

If you're going to publish an application for others to use beside yourself, you'll want to harden the code to protect against things like malicious queries, add more elegant error handling and explanatory labels, improve the style, and otherwise scale it for enterprise use.

Meanwhile, though, this code should get you started creating an interactive application for querying a data set with natural language:


library(shiny)
library(openai)
library(dplyr)
library(sqldf)

# Load hard-coded dataset
states <- read.csv("states.csv") |>
  dplyr::filter(!is.na(Region) & Region != "")

states_schema <- sqldf::sqldf("PRAGMA table_info(states)")
states_schema_string <- paste(apply(states_schema, 1, paste, collapse = "\t"), collapse = "\n")

states_sample <- dplyr::sample_n(states, 3)
states_sample_string <- paste(apply(states_sample, 1, paste, collapse = "\t"), collapse = "\n")

# Function to process user input
get_prompt <- function(query, schema = states_schema_string, rows_sample = states_sample_string, table_name = "states") {
  my_prompt <- glue::glue("Act as if you're a data scientist. You have a SQLite table named {table_name} with the following schema:
  
             ```
             {schema}
             ```
             
             The first rows look like this: 
             
             ```{rows_sample}```
             
             Based on this data, write a SQL query to answer the following question: {query}  Return the SQL query ONLY. Do not include any additional explanation.")
  print(my_prompt)
  return(my_prompt)
  
}

ui <- fluidPage(
  titlePanel("Query state database"),
  sidebarLayout(
    sidebarPanel(
      textInput("query", "Enter your query", placeholder = "e.g., What is the total 2020 population by Region?"),
      actionButton("submit_btn", "Submit")
    ),
    mainPanel(
      uiOutput("the_sql"),
      br(),
      br(),
        verbatimTextOutput("results")
    )
  )
)

server <- function(input, output) {

# Create the prompt from the user query to send to GPT
  the_prompt <- eventReactive(input$submit_btn, {
    req(input$query, states_schema_string, states_sample_string)
    my_prompt <- get_prompt(query = input$query)
  })    
  
# send prompt to GPT, get SQL, run SQL, print results
observeEvent(input$submit_btn, {
  req(the_prompt()) # text to send to GPT
  
  # Send results to GPT and get response
  # withProgress adds a Shiny progress bar. Commas now needed after each statement
  withProgress(message = 'Getting results from GPT', value = 0, {  # Add Shiny progress message
  my_results <- openai::create_chat_completion(model =  "gpt-3.5-turbo", temperature = 0, messages =  list(
    list(role = "user", content = the_prompt())
  )) 
  the_gpt_sql <- my_results$choices$message.content 
  
  # print the SQL
  sql_html <- gsub("\n", "<br />", the_gpt_sql) 
  sql_html <- paste0("<p>", sql_html, "</p>") 
  
  # Run SQL on data to get results
  gpt_answer <- sqldf(the_gpt_sql) 
  setProgress(value = 1, message = 'GPT results received') # Send msg to user that 
  })
  # Print SQL and results
  output$the_sql <- renderUI(HTML(sql_html)) 
  
  if (is.vector(gpt_answer) ) {
    output$results <- renderPrint(gpt_answer) 
  } else {
    output$results <- renderPrint({ print(gpt_answer) }) 
  } 
})  
}
shinyApp(ui = ui, server = server)

Copyright © 2023 IDG Communications, Inc.

1 2 Page 2
Page 2 of 2