MongoDB Vector Store Example#
Setup MongoDB instance on Atlas (or wherever is convenient)
Download state_of_the_union.txt
Add the index. On the database cluster page, to to the search tab > create index, paste the following into the JSON editor:
{
"mappings": {
"dynamic": true,
"fields": {
"embedding": {
"dimensions": 1536,
"similarity": "cosine",
"type": "knnVector"
}
}
}
}
Get your mongo uri, and save the following example langchain template to
mongodbvector_chain.json.
Todo: Don’t have password straight in the schema
{
"name": "ChromaDB Chain",
"description": "An example chain using ChromaDB as a retriever.",
"template_version": "0.0.7",
"chain": {
"type": "RetrievalQA",
"chain_type": "stuff",
"llm": {
"type": "openai",
"args": {
"temperature": 0
}
},
"retriever": {
"type": "vectorstore",
"vectorstore": {
"type": "MongoDB",
"host": "<mongo uri here>",
"port": ""
}
}
}
}
Seed your database:
import os
from dotenv import load_dotenv
load_dotenv()
MONGODB_ATLAS_CLUSTER_URI = os.environ["MONGO_URI"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain.document_loaders import TextLoader
from langchain.document_loaders import TextLoader
loader = TextLoader("state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
from pymongo import MongoClient
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)
db_name = "langchain_db"
collection_name = "langchain_col"
collection = client[db_name][collection_name]
docsearch = MongoDBAtlasVectorSearch(collection=collection, embedding=OpenAIEmbeddings())
docsearch.add_documents(docs)
Make sure similarity search is working with
# perform a similarity search between the embedding of the query and the embeddings of the documents
query = "What did the president say about Ketanji Brown Jackson"
docsearch.similarity_search(query, k=10)
Run the chain
from langchain_interpreter import chain_from_file
chain = chain_from_file("mongodbvector_chain.json")
chain.run("What did the president say about Ketanji Brown Jackson")