Build a private, offline RAG (Retrieval-Augmented Generation) chatbot that answers questions based on your documents — using Streamlit + LangChain + Ollama in 2026.
ollama pull llama3.1:8b (or mistral, phi3, gemma2)
# app.py
import streamlit as st
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# LLM & Embeddings
llm = ChatOllama(model="llama3.1:8b")
embeddings = OllamaEmbeddings(model="llama3.1:8b")
# Prompt template
prompt = ChatPromptTemplate.from_template(
"""Answer the question based only on the following context:
{context}
Question: {question}
"""
)
# RAG chain
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Streamlit UI
st.title("Private RAG Chatbot 2026")
uploaded_file = st.file_uploader("Upload PDF", type="pdf")
if uploaded_file:
# Save & load PDF
with open("temp.pdf", "wb") as f:
f.write(uploaded_file.getbuffer())
loader = PyPDFLoader("temp.pdf")
docs = loader.load()
# Split & embed
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
st.header("Ask questions about the document")
question = st.text_input("Your question:")
if question:
response = rag_chain.invoke(question)
st.write("**Answer:**", response)
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
qa_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
memory=memory
)
# In Streamlit
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask anything about the document"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
response = qa_chain({"question": prompt})
st.markdown(response["answer"])
st.session_state.messages.append({"role": "assistant", "content": response["answer"]})
streamlit run app.pyEXPOSE 8501