Skip to content

Pptx reader

PptxReader

Bases: Reader

Pptx reader.

Source code in agentuniverse/agent/action/knowledge/reader/file/pptx_reader.py
Python
class PptxReader(Reader):
    """Pptx reader."""

    def load_data(self, file: Path, ext_info: Optional[Dict] = None) -> List[Document]:
        """Parse the pptx file.

        Note:
            `python-pptx` is required to read PPTX files: `pip install python-pptx`
        """
        try:
            from pptx import Presentation
        except ImportError:
            raise ImportError(
                "python-pptx is required to read pptx files: `pip install python-pptx`"
            )
        presentation = Presentation(file)
        document_list = []
        for slide_number, slide in enumerate(presentation.slides, start=1):
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    metadata = {"slide_number": slide_number, "file_name": file.name}
                    if ext_info is not None:
                        metadata.update(ext_info)
                    # Extract the text from the shape
                    document_list.append(Document(text=shape.text, metadata=metadata))
        return document_list

load_data(file, ext_info=None)

Parse the pptx file.

Note

python-pptx is required to read PPTX files: pip install python-pptx

Source code in agentuniverse/agent/action/knowledge/reader/file/pptx_reader.py
Python
def load_data(self, file: Path, ext_info: Optional[Dict] = None) -> List[Document]:
    """Parse the pptx file.

    Note:
        `python-pptx` is required to read PPTX files: `pip install python-pptx`
    """
    try:
        from pptx import Presentation
    except ImportError:
        raise ImportError(
            "python-pptx is required to read pptx files: `pip install python-pptx`"
        )
    presentation = Presentation(file)
    document_list = []
    for slide_number, slide in enumerate(presentation.slides, start=1):
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                metadata = {"slide_number": slide_number, "file_name": file.name}
                if ext_info is not None:
                    metadata.update(ext_info)
                # Extract the text from the shape
                document_list.append(Document(text=shape.text, metadata=metadata))
    return document_list