MongoDB Vector Store Example#

Setup MongoDB instance on Atlas (or wherever is convenient)
Download state_of_the_union.txt
Add the index. On the database cluster page, to to the search tab > create index, paste the following into the JSON editor:

{
  "mappings": {
    "dynamic": true,
    "fields": {
      "embedding": {
        "dimensions": 1536,
        "similarity": "cosine",
        "type": "knnVector"
      }
    }
  }
}

Get your mongo uri, and save the following example langchain template to mongodbvector_chain.json.

Todo: Don’t have password straight in the schema

{
  "name": "ChromaDB Chain",
  "description": "An example chain using ChromaDB as a retriever.",
  "template_version": "0.0.7",
  "chain": {
    "type": "RetrievalQA",
    "chain_type": "stuff",
    "llm": {
      "type": "openai",
      "args": {
        "temperature": 0
      }
    },
    "retriever": {
      "type": "vectorstore",
      "vectorstore": {
        "type": "MongoDB",
        "host": "<mongo uri here>",
        "port": ""
      }

    }
  }
}

Seed your database:

import os
from dotenv import load_dotenv
load_dotenv()

MONGODB_ATLAS_CLUSTER_URI = os.environ["MONGO_URI"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain.document_loaders import TextLoader
from langchain.document_loaders import TextLoader

loader = TextLoader("state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
from pymongo import MongoClient

client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)

db_name = "langchain_db"
collection_name = "langchain_col"
collection = client[db_name][collection_name]
docsearch = MongoDBAtlasVectorSearch(collection=collection, embedding=OpenAIEmbeddings())

docsearch.add_documents(docs)

Make sure similarity search is working with

# perform a similarity search between the embedding of the query and the embeddings of the documents
query = "What did the president say about Ketanji Brown Jackson"
docsearch.similarity_search(query, k=10)

Run the chain

from langchain_interpreter import chain_from_file

chain = chain_from_file("mongodbvector_chain.json")
chain.run("What did the president say about Ketanji Brown Jackson")