Skip to content

Openai embedding

OpenAIEmbedding

Bases: Embedding

The openai embedding class.

Source code in agentuniverse/agent/action/knowledge/embedding/openai_embedding.py
Python
class OpenAIEmbedding(Embedding):
    """The openai embedding class."""

    openai_client_args: Optional[dict] = None
    openai_api_key: Optional[str] = Field(default_factory=lambda: get_from_env("OPENAI_API_KEY"))
    client: Any = None
    async_client: Any = None
    dimensions: Optional[int] = None

    def get_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Get the OpenAI embeddings.

        Note:
            The `embedding_model_name` parameter of the openai embedding class must be provided.
            The `dimensions` parameter of the openai embedding class is optional.

         Args:
             texts (List[str]): A list of texts that need to be embedded.

         Returns:
             List[List[float]]: Each text gets a float list, and the result is a list of the results for each text.

         Raises:
             ValueError: If texts exceed the embedding model token limit or missing some required parameters.
         """
        self.client = OpenAI(api_key=self.openai_api_key, **self.openai_client_args or {})
        if self.embedding_model_name is None:
            raise ValueError("Must provide `embedding_model_name`")
        try:
            if self.dimensions:
                response = self.client.embeddings.create(input=texts, model=self.embedding_model_name,
                                                         dimensions=self.dimensions)
            else:
                response = self.client.embeddings.create(input=texts, model=self.embedding_model_name)

            # Extract the embedding data from the response
            data = response.data

            # Return the embeddings as a list of lists of floats
            return [embedding.embedding for embedding in data]
        except BadRequestError as e:
            raise ValueError(e.message)

    async def async_get_embeddings(self, texts: List[str]) -> List[List[float]]:
        """Asynchronously get the OpenAI embeddings.

        Note:
            The `embedding_model_name` parameter of the openai embedding class must be provided.
            The `dimensions` parameter of the openai embedding class is optional.

         Args:
             texts (List[str]): A list of texts that need to be embedded.

         Returns:
             List[List[float]]: Each text gets a float list, and the result is a list of the results for each text.
         Raises:
             ValueError: If texts exceed the embedding model token limit or missing some required parameters.
         """
        self.async_client = AsyncOpenAI(api_key=self.openai_api_key, **self.openai_client_args or {})
        if self.embedding_model_name is None:
            raise ValueError("Must provide `embedding_model_name`")
        try:
            if self.dimensions:
                response = await self.async_client.embeddings.create(input=texts, model=self.embedding_model_name,
                                                                     dimensions=self.dimensions)
            else:
                response = await self.async_client.embeddings.create(input=texts, model=self.embedding_model_name)
            # Extract the embedding data from the response
            data = response.data

            # Return the embeddings as a list of lists of floats
            return [embedding.embedding for embedding in data]
        except BadRequestError as e:
            raise ValueError(e.message)

    def as_langchain(self) -> OpenAIEmbeddings:
        """Convert the agentUniverse(aU) openai embedding class to the langchain openai embedding class."""
        return OpenAIEmbeddings(openai_api_key=self.openai_api_key,
                                client=self.client.embeddings, async_client=self.async_client.embeddings)

as_langchain()

Convert the agentUniverse(aU) openai embedding class to the langchain openai embedding class.

Source code in agentuniverse/agent/action/knowledge/embedding/openai_embedding.py
Python
def as_langchain(self) -> OpenAIEmbeddings:
    """Convert the agentUniverse(aU) openai embedding class to the langchain openai embedding class."""
    return OpenAIEmbeddings(openai_api_key=self.openai_api_key,
                            client=self.client.embeddings, async_client=self.async_client.embeddings)

async_get_embeddings(texts) async

Asynchronously get the OpenAI embeddings.

Note

The embedding_model_name parameter of the openai embedding class must be provided. The dimensions parameter of the openai embedding class is optional.

Returns: List[List[float]]: Each text gets a float list, and the result is a list of the results for each text. Raises: ValueError: If texts exceed the embedding model token limit or missing some required parameters.

Source code in agentuniverse/agent/action/knowledge/embedding/openai_embedding.py
Python
async def async_get_embeddings(self, texts: List[str]) -> List[List[float]]:
    """Asynchronously get the OpenAI embeddings.

    Note:
        The `embedding_model_name` parameter of the openai embedding class must be provided.
        The `dimensions` parameter of the openai embedding class is optional.

     Args:
         texts (List[str]): A list of texts that need to be embedded.

     Returns:
         List[List[float]]: Each text gets a float list, and the result is a list of the results for each text.
     Raises:
         ValueError: If texts exceed the embedding model token limit or missing some required parameters.
     """
    self.async_client = AsyncOpenAI(api_key=self.openai_api_key, **self.openai_client_args or {})
    if self.embedding_model_name is None:
        raise ValueError("Must provide `embedding_model_name`")
    try:
        if self.dimensions:
            response = await self.async_client.embeddings.create(input=texts, model=self.embedding_model_name,
                                                                 dimensions=self.dimensions)
        else:
            response = await self.async_client.embeddings.create(input=texts, model=self.embedding_model_name)
        # Extract the embedding data from the response
        data = response.data

        # Return the embeddings as a list of lists of floats
        return [embedding.embedding for embedding in data]
    except BadRequestError as e:
        raise ValueError(e.message)

get_embeddings(texts)

Get the OpenAI embeddings.

Note

The embedding_model_name parameter of the openai embedding class must be provided. The dimensions parameter of the openai embedding class is optional.

Returns: List[List[float]]: Each text gets a float list, and the result is a list of the results for each text.

Raises: ValueError: If texts exceed the embedding model token limit or missing some required parameters.

Source code in agentuniverse/agent/action/knowledge/embedding/openai_embedding.py
Python
def get_embeddings(self, texts: List[str]) -> List[List[float]]:
    """Get the OpenAI embeddings.

    Note:
        The `embedding_model_name` parameter of the openai embedding class must be provided.
        The `dimensions` parameter of the openai embedding class is optional.

     Args:
         texts (List[str]): A list of texts that need to be embedded.

     Returns:
         List[List[float]]: Each text gets a float list, and the result is a list of the results for each text.

     Raises:
         ValueError: If texts exceed the embedding model token limit or missing some required parameters.
     """
    self.client = OpenAI(api_key=self.openai_api_key, **self.openai_client_args or {})
    if self.embedding_model_name is None:
        raise ValueError("Must provide `embedding_model_name`")
    try:
        if self.dimensions:
            response = self.client.embeddings.create(input=texts, model=self.embedding_model_name,
                                                     dimensions=self.dimensions)
        else:
            response = self.client.embeddings.create(input=texts, model=self.embedding_model_name)

        # Extract the embedding data from the response
        data = response.data

        # Return the embeddings as a list of lists of floats
        return [embedding.embedding for embedding in data]
    except BadRequestError as e:
        raise ValueError(e.message)