Skip to content

Chroma store

ChromaStore

Bases: Store

Object encapsulating the ChromaDB store that has vector search enabled.

The ChromaStore object provides insert and retrieval capabilities.

Attributes:

Name Type Description
collection_name str

The name of the chroma collection to use.

collection Collection

A chroma collection object.

persist_path Optional[str]

Path to save the chroma database.

Source code in agentuniverse/agent/action/knowledge/store/chroma_store.py
Python
class ChromaStore(Store):
    """Object encapsulating the ChromaDB store that has vector search enabled.

    The ChromaStore object provides insert and retrieval capabilities.

    Attributes:
        collection_name (str): The name of the chroma collection to use.
        collection (Collection): A chroma collection object.
        persist_path (Optional[str]): Path to save the chroma database.
    """

    collection_name: Optional[str] = 'chroma_db'
    collection: Collection = None
    persist_path: Optional[str] = None

    def __init__(self, **kwargs):
        """Initialize the chroma store class."""
        super().__init__(**kwargs)
        if self.collection is None:
            # default to create a new collection or get an existed collection.
            self.collection = self.client.get_or_create_collection(name=self.collection_name)

    def _new_client(self) -> Any:
        """Initialize the chroma client."""
        if self.persist_path is None:
            return chromadb.PersistentClient()
        else:
            return chromadb.PersistentClient(path=self.persist_path)

    def query(self, query: Query, **kwargs) -> List[Document]:
        """Query the chroma collection with the given query and return the top k results.

        Args:
            query (Query): The query object.
            **kwargs: Arbitrary keyword arguments.

        Note:
            If there is no embedding in the specific query, but the embedding model is configured in the store,
            the embedding data of the query is automatically obtained by the embedding model.

        Returns:
            List[Document]: List of documents retrieved by the query.
        """

        embedding = query.embedding
        if self.embedding_model is not None and len(embedding) == 0:
            embedding = self.embedding_model.get_embeddings([query.query_str])[0]
        if len(embedding) > 0:
            query_result = self.collection.query(
                n_results=query.similarity_top_k,
                query_embeddings=[embedding]
            )
        else:
            query_result = self.collection.query(
                n_results=query.similarity_top_k,
                query_texts=[query.query_str]
            )
        # convert to the agentUniverse(aU) document format
        return self.to_documents(query_result)

    def insert_documents(self, documents: List[Document], **kwargs: Any):
        """Insert documents to the chroma collection.

        Args:
            documents (List[Document]): The documents to be inserted.
            **kwargs: Arbitrary keyword arguments.

        Note:
            If there is no embedding in the specific document, but the embedding model is configured in the store,
            the embedding data of the document is automatically obtained by the embedding model.
        """

        for document in documents:
            embedding = document.embedding
            if self.embedding_model is not None and len(embedding) == 0:
                embedding = self.embedding_model.get_embeddings([document.text])[0]
            self.collection.add(
                documents=[document.text],
                metadatas=[document.metadata],
                embeddings=[embedding] if embedding is not None else None,
                ids=[document.id]
            )

    def upsert_document(self, documents: List[Document], **kwargs):
        """Upsert document into the store."""
        for document in documents:
            embedding = document.embedding
            if self.embedding_model is not None and len(embedding) == 0:
                embedding = self.embedding_model.get_embeddings([document.text])[0]
            self.collection.upsert(
                documents=[document.text],
                metadatas=[document.metadata],
                embeddings=[embedding] if embedding is not None else None,
                ids=[document.id]
            )

    def update_document(self, documents: List[Document], **kwargs):
        """Update document into the store."""
        for document in documents:
            embedding = document.embedding
            if self.embedding_model is not None and len(embedding) == 0:
                embedding = self.embedding_model.get_embeddings([document.text])[0]
            self.collection.update(
                documents=[document.text],
                metadatas=[document.metadata],
                embeddings=[embedding] if embedding is not None else None,
                ids=[document.id]
            )

    @staticmethod
    def to_documents(query_result: QueryResult) -> List[Document]:
        """Convert the query results of ChromaDB to the agentUniverse(aU) document format."""

        if query_result is None:
            return []
        documents = []
        for i in range(len(query_result['ids'][0])):
            documents.append(Document(id=query_result['ids'][0][i],
                                      text=query_result['documents'][0][i],
                                      embedding=query_result['embeddings'][0][i]
                                      if query_result['embeddings'] is not None else [],
                                      metadata=query_result['metadatas'][0][i]
                                      if query_result['metadatas'] is not None else None))
        return documents

__init__(**kwargs)

Initialize the chroma store class.

Source code in agentuniverse/agent/action/knowledge/store/chroma_store.py
Python
def __init__(self, **kwargs):
    """Initialize the chroma store class."""
    super().__init__(**kwargs)
    if self.collection is None:
        # default to create a new collection or get an existed collection.
        self.collection = self.client.get_or_create_collection(name=self.collection_name)

insert_documents(documents, **kwargs)

Insert documents to the chroma collection.

Parameters:

Name Type Description Default
documents List[Document]

The documents to be inserted.

required
**kwargs Any

Arbitrary keyword arguments.

{}
Note

If there is no embedding in the specific document, but the embedding model is configured in the store, the embedding data of the document is automatically obtained by the embedding model.

Source code in agentuniverse/agent/action/knowledge/store/chroma_store.py
Python
def insert_documents(self, documents: List[Document], **kwargs: Any):
    """Insert documents to the chroma collection.

    Args:
        documents (List[Document]): The documents to be inserted.
        **kwargs: Arbitrary keyword arguments.

    Note:
        If there is no embedding in the specific document, but the embedding model is configured in the store,
        the embedding data of the document is automatically obtained by the embedding model.
    """

    for document in documents:
        embedding = document.embedding
        if self.embedding_model is not None and len(embedding) == 0:
            embedding = self.embedding_model.get_embeddings([document.text])[0]
        self.collection.add(
            documents=[document.text],
            metadatas=[document.metadata],
            embeddings=[embedding] if embedding is not None else None,
            ids=[document.id]
        )

query(query, **kwargs)

Query the chroma collection with the given query and return the top k results.

Parameters:

Name Type Description Default
query Query

The query object.

required
**kwargs

Arbitrary keyword arguments.

{}
Note

If there is no embedding in the specific query, but the embedding model is configured in the store, the embedding data of the query is automatically obtained by the embedding model.

Returns:

Type Description
List[Document]

List[Document]: List of documents retrieved by the query.

Source code in agentuniverse/agent/action/knowledge/store/chroma_store.py
Python
def query(self, query: Query, **kwargs) -> List[Document]:
    """Query the chroma collection with the given query and return the top k results.

    Args:
        query (Query): The query object.
        **kwargs: Arbitrary keyword arguments.

    Note:
        If there is no embedding in the specific query, but the embedding model is configured in the store,
        the embedding data of the query is automatically obtained by the embedding model.

    Returns:
        List[Document]: List of documents retrieved by the query.
    """

    embedding = query.embedding
    if self.embedding_model is not None and len(embedding) == 0:
        embedding = self.embedding_model.get_embeddings([query.query_str])[0]
    if len(embedding) > 0:
        query_result = self.collection.query(
            n_results=query.similarity_top_k,
            query_embeddings=[embedding]
        )
    else:
        query_result = self.collection.query(
            n_results=query.similarity_top_k,
            query_texts=[query.query_str]
        )
    # convert to the agentUniverse(aU) document format
    return self.to_documents(query_result)

to_documents(query_result) staticmethod

Convert the query results of ChromaDB to the agentUniverse(aU) document format.

Source code in agentuniverse/agent/action/knowledge/store/chroma_store.py
Python
@staticmethod
def to_documents(query_result: QueryResult) -> List[Document]:
    """Convert the query results of ChromaDB to the agentUniverse(aU) document format."""

    if query_result is None:
        return []
    documents = []
    for i in range(len(query_result['ids'][0])):
        documents.append(Document(id=query_result['ids'][0][i],
                                  text=query_result['documents'][0][i],
                                  embedding=query_result['embeddings'][0][i]
                                  if query_result['embeddings'] is not None else [],
                                  metadata=query_result['metadatas'][0][i]
                                  if query_result['metadatas'] is not None else None))
    return documents

update_document(documents, **kwargs)

Update document into the store.

Source code in agentuniverse/agent/action/knowledge/store/chroma_store.py
Python
def update_document(self, documents: List[Document], **kwargs):
    """Update document into the store."""
    for document in documents:
        embedding = document.embedding
        if self.embedding_model is not None and len(embedding) == 0:
            embedding = self.embedding_model.get_embeddings([document.text])[0]
        self.collection.update(
            documents=[document.text],
            metadatas=[document.metadata],
            embeddings=[embedding] if embedding is not None else None,
            ids=[document.id]
        )

upsert_document(documents, **kwargs)

Upsert document into the store.

Source code in agentuniverse/agent/action/knowledge/store/chroma_store.py
Python
def upsert_document(self, documents: List[Document], **kwargs):
    """Upsert document into the store."""
    for document in documents:
        embedding = document.embedding
        if self.embedding_model is not None and len(embedding) == 0:
            embedding = self.embedding_model.get_embeddings([document.text])[0]
        self.collection.upsert(
            documents=[document.text],
            metadatas=[document.metadata],
            embeddings=[embedding] if embedding is not None else None,
            ids=[document.id]
        )