Spaces:

acadiaway
/

gemini_nl2sql

Sleeping

App Files Files Community

acadiaway commited on Sep 1

Commit

a474ea6

1 Parent(s): b4bcb5e

defog/sqlcoder-7b switch txt-2-SQL model

Browse files

Files changed (4) hide show

Dockerfile +7 -3
app.py +55 -9
pipeline.py +116 -35
requirements.txt +4 -4

Dockerfile CHANGED Viewed

@@ -17,14 +17,18 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py pipeline.py db_utils.py ./
-# Set up cache directory
 RUN mkdir -p /tmp/cache/huggingface && \
     chmod -R 777 /tmp/cache/huggingface
 ENV HF_HOME=/tmp/cache/huggingface
 ENV PORT=8501
-ENV OMP_NUM_THREADS=8
 EXPOSE 8501
-CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

 COPY app.py pipeline.py db_utils.py ./
+# Set up cache directory with proper permissions
 RUN mkdir -p /tmp/cache/huggingface && \
     chmod -R 777 /tmp/cache/huggingface
+# Environment variables
 ENV HF_HOME=/tmp/cache/huggingface
+ENV TRANSFORMERS_CACHE=/tmp/cache/huggingface
+ENV HF_DATASETS_CACHE=/tmp/cache/huggingface
 ENV PORT=8501
+ENV OMP_NUM_THREADS=4
+ENV TOKENIZERS_PARALLELISM=false
 EXPOSE 8501
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

app.py CHANGED Viewed

@@ -1,13 +1,59 @@
 import streamlit as st
 from pipeline import text_to_sql
-st.title("Arctic Text-to-SQL App")
-nl_query = st.text_input("Enter your query:", value="List 11 names of ships type schooner")
-if st.button("Generate & Execute"):
-    if nl_query:
-        sql, results = text_to_sql(nl_query)
-        st.write("Generated SQL:", sql)
-        st.write("Results:", results)
     else:
-        st.error("Enter a query.")

 import streamlit as st
 from pipeline import text_to_sql
+st.title("SQLCoder Text-to-SQL App")
+st.write("Powered by defog/sqlcoder-7b-2 🚀")
+# Sample queries for user guidance
+st.sidebar.header("Sample Queries")
+sample_queries = [
+    "List 11 names of ships type schooner",
+    "Show me the 5 oldest ships",
+    "What are the different types of vessels?",
+    "Count the number of ships by type",
+    "Show ships built after 1900"
+]
+selected_sample = st.sidebar.selectbox("Choose a sample query:", [""] + sample_queries)
+# Main input
+nl_query = st.text_input(
+    "Enter your natural language query:",
+    value=selected_sample if selected_sample else "List 11 names of ships type schooner",
+    help="Ask questions about your database in plain English"
+)
+if st.button("🔄 Generate & Execute SQL"):
+    if nl_query.strip():
+        with st.spinner("Generating SQL and executing query..."):
+            try:
+                sql, results = text_to_sql(nl_query)
+                # Display results
+                st.success("Query executed successfully!")
+                # Show generated SQL
+                st.subheader("Generated SQL:")
+                st.code(sql, language="sql")
+                # Show results
+                st.subheader("Results:")
+                if results:
+                    # Convert results to a more readable format
+                    if isinstance(results[0], tuple):
+                        # If results are tuples, display as table
+                        st.write(f"Found {len(results)} rows:")
+                        for i, row in enumerate(results[:50]):  # Show first 50 rows
+                            st.write(f"Row {i+1}: {row}")
+                        if len(results) > 50:
+                            st.info(f"Showing first 50 rows out of {len(results)} total results.")
+                    else:
+                        st.write(results)
+                else:
+                    st.info("Query executed successfully but returned no results.")
+            except Exception as e:
+                st.error(f"Error: {str(e)}")
+                st.write("Please try rephrasing your query or check if the requested data exists in the database.")
     else:
+        st.warning("Please enter a query to proceed.")

pipeline.py CHANGED Viewed

@@ -1,51 +1,132 @@
 import os
-from transformers import AutoTokenizer
-from vllm import LLM, SamplingParams
 from db_utils import get_schema, execute_sql
-# Initialize model at startup
 model = None
 tokenizer = None
-try:
-    tokenizer = AutoTokenizer.from_pretrained(
-        "Snowflake/Arctic-Text2SQL-R1-7B",
-        cache_dir="/tmp/cache/huggingface",
-        trust_remote_code=True
-    )
-    model = LLM(
-        model="Snowflake/Arctic-Text2SQL-R1-7B",
-        dtype="float16",
-        gpu_memory_utilization=0.75,
-        max_model_len=1024,
-        max_num_seqs=1,
-        enforce_eager=True,
-        trust_remote_code=True
-    )
-except Exception as e:
-    print(f"Error loading model at startup: {e}")
-    raise
-def text_to_sql(nl_query):
     try:
-        schema = get_schema()
-        prompt = f"""### Task
-Generate a SQL query to answer the following natural language question: {nl_query}
 ### Database Schema
 {schema}
-### Response Format
-Output only the SQL query.
 """
-        sampling_params = SamplingParams(
-            temperature=0,
-            max_tokens=128,
-            stop=["\n\n"]
-        )
-        outputs = model.generate([prompt], sampling_params)
-        sql = outputs[0].outputs[0].text.strip()
         results = execute_sql(sql)
         return sql, results
     except Exception as e:
         print(f"Error in text_to_sql: {e}")
-        raise

 import os
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from db_utils import get_schema, execute_sql
+# Initialize model and tokenizer as global variables
 model = None
 tokenizer = None
+def load_model():
+    """Load SQLCoder model with quantization for memory efficiency"""
+    global model, tokenizer
+    if model is not None and tokenizer is not None:
+        return model, tokenizer
     try:
+        # Configure quantization to reduce memory usage
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4"
+        )
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            "defog/sqlcoder-7b-2",
+            trust_remote_code=True,
+            cache_dir="/tmp/cache/huggingface"
+        )
+        # Load model with quantization
+        model = AutoModelForCausalLM.from_pretrained(
+            "defog/sqlcoder-7b-2",
+            quantization_config=quantization_config,
+            device_map="auto",
+            trust_remote_code=True,
+            torch_dtype=torch.float16,
+            cache_dir="/tmp/cache/huggingface"
+        )
+        print("SQLCoder model loaded successfully!")
+        return model, tokenizer
+    except Exception as e:
+        print(f"Error loading SQLCoder model: {e}")
+        raise e
+def generate_sql(nl_query, schema):
+    """Generate SQL using SQLCoder with proper prompting"""
+    prompt = f"""### Task
+Generate a PostgreSQL query to answer this question: {nl_query}
 ### Database Schema
+The query will run on a database with the following schema:
 {schema}
+### Instructions
+- Return only the SQL query, no explanation
+- Use proper PostgreSQL syntax
+- Include appropriate LIMIT clauses if the question asks for a specific number of results
+### SQL Query:
 """
+    return prompt
+def text_to_sql(nl_query):
+    """Main function to convert natural language to SQL and execute it"""
+    try:
+        # Load model if not already loaded
+        model, tokenizer = load_model()
+        # Get database schema
+        schema = get_schema()
+        # Create the prompt
+        prompt = generate_sql(nl_query, schema)
+        # Tokenize input
+        inputs = tokenizer.encode(prompt, return_tensors="pt")
+        # Move to appropriate device
+        device = next(model.parameters()).device
+        inputs = inputs.to(device)
+        # Generate SQL
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs,
+                max_new_tokens=200,
+                num_beams=4,
+                temperature=0.1,
+                do_sample=False,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id
+            )
+        # Decode the output
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract just the SQL part (after the prompt)
+        sql_start = generated_text.find("### SQL Query:") + len("### SQL Query:")
+        sql = generated_text[sql_start:].strip()
+        # Clean up the SQL (remove any extra text after the query)
+        sql_lines = sql.split('\n')
+        sql = sql_lines[0].strip() if sql_lines else sql.strip()
+        # Remove any trailing semicolon if present and clean
+        sql = sql.rstrip(';').strip()
+        # Basic validation
+        if not sql or not sql.lower().startswith('select'):
+            raise ValueError(f"Generated invalid SQL: {sql}")
+        print(f"Generated SQL: {sql}")
+        # Execute the SQL
         results = execute_sql(sql)
         return sql, results
     except Exception as e:
         print(f"Error in text_to_sql: {e}")
+        return f"Error: {str(e)}", []
+# Initialize model on import (optional - can be lazy loaded)
+try:
+    load_model()
+except Exception as e:
+    print(f"Model will be loaded on first use due to: {e}")

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-transformers==4.56.0
-accelerate==1.10.1
 psycopg2-binary==2.9.10
 sqlalchemy==2.0.43
 python-dotenv==1.1.1
-vllm==0.10.1
 streamlit==1.39.0
-torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cu121

+transformers==4.45.2
+accelerate==0.34.2
 psycopg2-binary==2.9.10
 sqlalchemy==2.0.43
 python-dotenv==1.1.1
+torch==2.4.1
 streamlit==1.39.0
+bitsandbytes==0.43.3