Chat with PDF using Pinecone

SUTRA by TWO Platforms
SUTRA is a family of large multi-lingual language models (LMLMs) pioneered by Two Platforms. SUTRA’s dual-transformer approach extends the power of both MoE and Dense AI language model architectures, delivering cost-efficient multilingual capabilities for over 50+ languages. It powers scalable AI applications for conversation, search, and advanced reasoning, ensuring high-performance across diverse languages, domains and applications.
Get Your API Keys
Before you begin, make sure you have:
- A SUTRA API key (Get yours at TWO AI's SUTRA API page)
- Basic familiarity with Python and Jupyter notebooks
This notebook is designed to run in Google Colab, so no local Python installation is required.
📌 1. Install Required Packages
!pip install -q langchain langchain_openai langchain-community requests pypdf langchain-pinecone ipywidgets
📌 STEP 2 : Setup API Keys
import os
from google.colab import userdata
# Set the API key from Colab secrets
os.environ["SUTRA_API_KEY"] = userdata.get("SUTRA_API_KEY")
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
os.environ["PINECONE_API_KEY"] = userdata.get("PINECONE_API_KEY")
📌 STEP 3 : Load Your PDF Document
from langchain_community.document_loaders import PyPDFLoader
# Load PDF using PyPDFLoader
loader = PyPDFLoader("/content/NIPS-2017-attention-is-all-you-need-Paper.pdf") # Replace with your actual PDF path
documents = loader.load()
print(f"Loaded {len(documents)} pages.")
📌 STEP 4 : Split Document into Chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks.")
📌 STEP 5 : Set Up Pinecone Vector Database
import os
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "sutra-pdf-indexs1"
# Create index if it doesn't exist
if index_name not in pc.list_indexes().names():
pc.create_index(
name=index_name,
dimension=1536,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1")
)
index = pc.Index(index_name)
📌 STEP 6 : Set Up Pinecone Vector Store
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
# Create vector store using Pinecone
embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))
vectorstore = PineconeVectorStore(index, embeddings)
# Add documents to Pinecone vector store
vectorstore.add_documents(chunks)
📌 STEP 7 : Set Up Conversation Memory and RAG Chain
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_openai import ChatOpenAI
# Set up conversation memory
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
# RAG Chain with Sutra LLM
rag_chain = ConversationalRetrievalChain.from_llm(
llm=ChatOpenAI(
api_key=os.getenv("SUTRA_API_KEY"),
base_url="https://api.two.ai/v2",
model="sutra-v2",
temperature=0.7
),
retriever=vectorstore.as_retriever(),
memory=memory
)
📌 STEP 8 : Ask Question with Language Specification
from langchain.schema import HumanMessage
# Desired language for the response
language = "Hindi" # change to any supported language
# User question
question = "What is Transformer ?"
# Get RAG answer from chain
context_result = rag_chain.invoke({"question": question})
rag_context = context_result['answer']
# Format prompt for multilingual Sutra response
system_prompt = f"""
You are a helpful assistant answering based on a document.
Use this context: {rag_context}
Always reply in: {language}
Question: {question}
"""
# Invoke Sutra LLM directly for language-controlled response
llm = ChatOpenAI(
api_key=os.getenv("SUTRA_API_KEY"),
base_url="https://api.two.ai/v2",
model="sutra-v2",
temperature=0.5,
)
response = llm.invoke([HumanMessage(content=system_prompt)])
print("User Question:", question)
print(f"Assistant ({language}):", response.content.strip())
✅ Finally Integrated UI
# 1. Imports
import os
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from tempfile import NamedTemporaryFile
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import HumanMessage
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
# 2. Get Sutra Chat Model
def get_sutra_model():
return ChatOpenAI(
api_key=os.getenv("SUTRA_API_KEY"),
base_url="https://api.two.ai/v2",
model="sutra-v2",
temperature=0.7
)
# 3. Load & Index PDF using Pinecone
def load_and_index_pdf(pdf_path):
loader = PyPDFLoader(pdf_path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)
# Embeddings
embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))
# Pinecone setup
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "sutra-pdf-indexs2"
# Create index if it doesn't exist
if index_name not in pc.list_indexes().names():
pc.create_index(
name=index_name,
dimension=1536,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1")
)
index = pc.Index(index_name)
vectorstore = PineconeVectorStore(index, embeddings)
# Add to index
vectorstore.add_documents(chunks)
# RAG Chain
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
chain = ConversationalRetrievalChain.from_llm(
llm=get_sutra_model(),
retriever=vectorstore.as_retriever(),
memory=memory
)
return chain
# 4. UI Components
pdf_file_upload = widgets.FileUpload(
accept='.pdf',
multiple=False,
description='📁 Upload PDF',
layout=widgets.Layout(width='300px')
)
load_pdf_button = widgets.Button(
description="🔄 Load PDF",
button_style='info',
layout=widgets.Layout(width='150px')
)
status_output = widgets.Output()
languages = [
"English", "Hindi", "Gujarati", "Bengali", "Tamil", "Telugu", "Kannada",
"Malayalam", "Punjabi", "Marathi", "Urdu", "Assamese", "Odia", "Sanskrit",
"Korean", "Japanese", "Arabic", "French", "German", "Spanish", "Portuguese",
"Russian", "Chinese", "Vietnamese", "Thai", "Indonesian", "Turkish", "Polish",
"Ukrainian", "Dutch", "Italian", "Greek", "Hebrew", "Persian", "Swedish",
"Norwegian", "Danish", "Finnish", "Czech", "Hungarian", "Romanian", "Bulgarian",
"Croatian", "Serbian", "Slovak", "Slovenian", "Estonian", "Latvian", "Lithuanian",
"Malay", "Tagalog", "Swahili"
]
lang_dropdown = widgets.Dropdown(
options=languages,
value="English",
description='🌐 Language:',
layout=widgets.Layout(width='300px')
)
chat_output = widgets.HTML(
value="<div style='padding:10px; font-family:Arial; font-size:14px; height:300px; overflow-y:auto; border:1px solid #ccc; border-radius:5px;'>Chat history will appear here...</div>"
)
user_input = widgets.Text(
placeholder='Type your message...',
layout=widgets.Layout(flex='4', width='auto')
)
send_button = widgets.Button(
description="📤 Send",
button_style='primary',
layout=widgets.Layout(flex='1', width='auto')
)
messages = []
conversation_chain = None
# 5. Load PDF Logic
def on_load_pdf(b):
global conversation_chain
uploaded_files = pdf_file_upload.value
with status_output:
clear_output()
if not uploaded_files:
print("❌ Please upload a PDF file first.")
return
try:
print("⏳ Processing uploaded PDF...")
uploaded_file = list(uploaded_files.values())[0]
with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(uploaded_file['content'])
tmp_path = tmp.name
conversation_chain = load_and_index_pdf(tmp_path)
print("✅ PDF loaded and indexed successfully!")
except Exception as e:
print("❌ Error:", e)
load_pdf_button.on_click(on_load_pdf)
# 6. Chat Interaction Logic
def on_send_click(b):
global conversation_chain
if conversation_chain is None:
with status_output:
clear_output()
print("❌ Load a PDF first.")
return
user_text = user_input.value.strip()
if not user_text:
return
lang = lang_dropdown.value
messages.append(f"<b style='color:#13f22d;'>You:</b> {user_text}")
context_response = conversation_chain.invoke(user_text)
rag_context = context_response['answer']
system_msg = f"""
You are a helpful assistant answering based on a document.
Use this context: {rag_context}
Always reply in: {lang}
Question: {user_text}
"""
chat_model = get_sutra_model()
sutra_response = chat_model.invoke([HumanMessage(content=system_msg)])
assistant_reply = sutra_response.content.strip()
messages.append(f"<b style='color:#007acc;'>Assistant ({lang}):</b> {assistant_reply}")
chat_html = "<br>".join(messages)
chat_output.value = f"<div style='padding:10px; font-family:Arial; font-size:14px; height:300px; overflow-y:auto; border:1px solid #ccc; border-radius:5px;'>{chat_html}</div>"
user_input.value = ""
send_button.on_click(on_send_click)
# 7. Final UI Layout
input_row = widgets.HBox([user_input, send_button])
pdf_row = widgets.HBox([pdf_file_upload, load_pdf_button])
ui = widgets.VBox([
widgets.HTML("<h3 style='font-family:Arial;'>📚 Multilingual Chat with PDF (Sutra + Pinecone)</h3>"),
pdf_row,
lang_dropdown,
chat_output,
input_row,
status_output
])
# 8. Display App
display(ui)