Connect LLM to SQL database with LangChain SQLChain
How to Tutorial for using LangChain SQLChain
Beginning the Process
First, ensure that PostgreSQL is installed on your machine and that you have an OpenAI account. If necessary, establish a new Python virtual environment before proceeding.
make sure to install requirements.txt from :
you will find the .json file on the repo as well
run :
!pip install -r requirements.txt
if you wanna create some data on your postgres server
import psycopg2import jsonimport psycopg2import json# Load the JSON data from the filewithopen("./data/my_database.json", "r")as f: data = json.load(f)# Connect to the PostgreSQL databaseconn = psycopg2.connect( host="localhost", database="postgres", user="user", password="password", port="5432")# Create a cursor objectcur = conn.cursor()# Create the tablecur.execute(""" CREATE TABLE IF NOT EXISTS person ( id SERIAL PRIMARY KEY, name VARCHAR(100), address TEXT, email VARCHAR(100), phone_number VARCHAR(30), birthdate DATE, job VARCHAR(100) )""")# Insert the data into the tablefor person_id, person_data in data["_default"].items(): name = person_data.get("name", "") address = person_data.get("address", "") email = person_data.get("email", "") phone_number = person_data.get("phone_number", "") birthdate = person_data.get("birthdate", None) job = person_data.get("job", "") cur.execute("INSERT INTO person (name, address, email, phone_number, birthdate, job) VALUES (%s, %s, %s, %s, %s, %s)", (name, address, email, phone_number, birthdate, job) )cur.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")table_names = [row[0]for row in cur.fetchall()]# Print the table namesprint("Table names:")for table_name in table_names:print(table_name)# Commit the changes and close the connectionconn.commit()
and then run :
from dotenv import load_dotenvenv_content ="""GROQ_API_KEY=your groq api key without ''"""# Write the content to a file named .envwithopen('.env', 'w')as f: f.write(env_content)# Load environment variablesload_dotenv(".env")
if you do not have groq api just make one here:
Set OpenAI API environment variables
import os# Set OpenAI API environment variables (if needed)os.environ["OPENAI_API_BASE"]="https://api.groq.com/openai/v1"os.environ["OPENAI_MODEL_NAME"]="llama3-8b-8192"os.environ["OPENAI_API_KEY"]=os.getenv("GROQ_API_KEY")
initialise your server again
from langchain_community.utilities import SQLDatabaseusername="postgres"password="password"# plain (unescaped) texthost="localhost"# Hostname or IP address of the PostgreSQL serverport="5432"# Port numbermydatabase="postgres"pg_uri =f"postgresql+psycopg2://{username}:{password}@{host}:{port}/{mydatabase}"db = SQLDatabase.from_uri(pg_uri)
from langchain_community.llms import OpenAIfrom langchain_experimental.sql import SQLDatabaseChain,SQLDatabaseSequentialChainfrom langchain.chains.llm import LLMChainfrom langchain.prompts import PromptTemplate from langchain.chains.sql_database.prompt import DECIDER_PROMPT, PROMPT, SQL_PROMPTS
create a prompt
sql_chain = SQLDatabaseChain.from_llm(llm, db)PROMPT =""" Given an input question, first create a syntactically correct postgresql query to run, then look at the results of the query and return the answer. The question: {question}"""
Run a query
decider_chain =LLMChain(llm=llm, prompt=DECIDER_PROMPT, output_key="table_names")classCustomSQLDatabaseSequentialChain(SQLDatabaseSequentialChain):defrun(self,question): result =super().run(question)print(f"Query result: {result}")return resultdb_chain =CustomSQLDatabaseSequentialChain(llm=llm, database=db, verbose=True, top_k=3, decider_chain=decider_chain, sql_chain=sql_chain, prompt=PROMPT)question ="Whom the person has a name starting with 'S'?"db_chain.run(question)
you will get result like
>Entering new CustomSQLDatabaseSequentialChain chain...
Table names to use: ['employees', 'person']
Finished chain.
Query result: Question: Whom the person has a name starting with 'S'? SQLQuery:SELECT "name" FROM employees WHERE "name" LIKE 'S%';
run this as well if you wanna display the results :
from sqlalchemy import create_engine, text# Assuming db_chain is your CustomSQLDatabaseSequentialChain instancequery = db_chain.run(question)# Extract the SQL query from the combined stringsql_query = query.split('SQLQuery:')[1].strip()# Establish a connection to your databaseengine =create_engine(pg_uri)# Execute the SQL querywith engine.connect()as connection: result = connection.execute(text(sql_query))# Print the resultsfor row in result:print(row)