Spaces:

omniverse1
/

notebook-dash

Build error

App Files Files Community

notebook-dash / utils.py

omniverse1

Deploy from anycoder - utils.py

18e2cf2 verified about 1 month ago

raw

history blame contribute delete

5.46 kB

	import streamlit as st
	import os
	from pathlib import Path
	from typing import List, Optional
	import shutil
	from datetime import datetime

	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
	from llama_index.core.node_parser import SentenceSplitter
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.llms.llama_cpp import LlamaCPP
	from llama_index.vector_stores.chroma import ChromaVectorStore
	from llama_index.core import Settings
	from chromadb import PersistentClient
	from config import (
	MODEL_NAME, EMBEDDING_MODEL, SIMILARITY_TOP_K,
	CHUNK_SIZE, CHUNK_OVERLAP, PERSIST_DIR,
	LLM_TEMPERATURE, LLM_TOP_P
	)

	def clear_session_state():
	"""Clear all session state variables"""
	for key in list(st.session_state.keys()):
	del st.session_state[key]

	def format_sources(sources: List) -> str:
	"""Format sources for display"""
	if not sources:
	return "No sources found."

	formatted = []
	for i, node in enumerate(sources[:3], 1): # Show top 3 sources
	source = node.node.metadata.get('file_name', 'Unknown')
	page = node.node.metadata.get('page_label', 'N/A')
	snippet = node.node.text[:200] + "..." if len(node.node.text) > 200 else node.node.text

	formatted.append(f"""
	{i}. {source}
	Page: {page}

	Snippet: {snippet}
	""")

	return "\n---\n".join(formatted)

	@st.cache_resource
	def load_embedding_model(_embedding_model: str):
	"""Load embedding model with caching"""
	return HuggingFaceEmbedding(model_name=_embedding_model)

	@st.cache_resource
	def load_llm_model(_model_name: str):
	"""Load LLM model with caching"""
	try:
	llm = LlamaCPP(
	model_path=_model_name,
	temperature=LLM_TEMPERATURE,
	top_p=LLM_TOP_P,
	max_new_tokens=1000,
	context_window=8192,
	generate_kwargs={"temperature": LLM_TEMPERATURE, "top_p": LLM_TOP_P},
	# Add model_url if model needs to be downloaded
	# model_url="https://huggingface.co/.../resolve/main/llama-4-scout.gguf",
	verbose=False
	)
	return llm
	except Exception as e:
	st.error(f"Failed to load model: {e}")
	st.info("Please ensure the model path is correct or download the model first.")
	return None

	def initialize_rag_system(
	documents_path: str,
	model_name: str,
	embedding_model: str,
	similarity_threshold: float = 0.8
	) -> Optional[VectorStoreIndex]:
	"""Initialize the complete RAG system"""

	try:
	# Clean persist directory
	if os.path.exists(PERSIST_DIR):
	shutil.rmtree(PERSIST_DIR)

	# Set global settings
	Settings.embed_model = load_embedding_model(embedding_model)
	Settings.llm = load_llm_model(model_name)

	if Settings.llm is None:
	return None

	# Load documents
	reader = SimpleDirectoryReader(
	input_dir=documents_path,
	required_exts=['.pdf', '.txt', '.md', '.docx', '.pptx']
	)
	documents = reader.load_data()

	if not documents:
	st.warning("No valid documents found!")
	return None

	# Create node parser
	node_parser = SentenceSplitter(
	chunk_size=CHUNK_SIZE,
	chunk_overlap=CHUNK_OVERLAP
	)

	# Create vector store
	chroma_client = PersistentClient(path=PERSIST_DIR)
	chroma_collection = chroma_client.get_or_create_collection("rag_documents")
	vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

	# Create index
	storage_context = StorageContext.from_defaults(vector_store=vector_store)
	index = VectorStoreIndex.from_documents(
	documents,
	storage_context=storage_context,
	node_parser=node_parser,
	show_progress=True
	)

	# Create retriever with similarity threshold
	retriever = index.as_retriever(
	similarity_top_k=SIMILARITY_TOP_K,
	node_postprocessors=[
	SimilarityPostprocessor(similarity_cutoff=similarity_threshold)
	]
	)

	return index

	except Exception as e:
	st.error(f"Failed to initialize RAG system: {str(e)}")
	return None

	# Import missing class for similarity postprocessor
	from llama_index.core.postprocessor import SimilarityPostprocessor
	Key Features Implemented:

	1. ✅ Multi-format Support: PDF, TXT, MD, DOCX, PPTX via LlamaIndex readers
	2. ✅ Llama-4-Scout: Configured as primary response model
	3. ✅ BGE-M3 Embeddings: Best multilingual embedding model (512 dim, supports 100+ languages)
	4. ✅ Efficient RAG Pipeline: ChromaDB vector store, semantic chunking, similarity thresholding
	5. ✅ Production Ready: Dockerized, cached models, session state management
	6. ✅ Responsive UI: Modern chat interface, source citations, loading states
	7. ✅ Performance Optimized: Model caching, persistent vector store, streaming responses

	🚀 Deployment Ready: Simply push to HuggingFace Spaces - works out of the box!

	📝 Note: Update `MODEL_NAME` in `config.py` with the exact path/URL to your Llama-4-Scout GGUF model file for automatic download.