Pptx reader
PptxReader
¶
Bases: Reader
Pptx reader.
Source code in agentuniverse/agent/action/knowledge/reader/file/pptx_reader.py
Python
class PptxReader(Reader):
"""Pptx reader."""
def load_data(self, file: Path, ext_info: Optional[Dict] = None) -> List[Document]:
"""Parse the pptx file.
Note:
`python-pptx` is required to read PPTX files: `pip install python-pptx`
"""
try:
from pptx import Presentation
except ImportError:
raise ImportError(
"python-pptx is required to read pptx files: `pip install python-pptx`"
)
presentation = Presentation(file)
document_list = []
for slide_number, slide in enumerate(presentation.slides, start=1):
for shape in slide.shapes:
if hasattr(shape, "text"):
metadata = {"slide_number": slide_number, "file_name": file.name}
if ext_info is not None:
metadata.update(ext_info)
# Extract the text from the shape
document_list.append(Document(text=shape.text, metadata=metadata))
return document_list
load_data(file, ext_info=None)
¶
Parse the pptx file.
Note
python-pptx is required to read PPTX files: pip install python-pptx
Source code in agentuniverse/agent/action/knowledge/reader/file/pptx_reader.py
Python
def load_data(self, file: Path, ext_info: Optional[Dict] = None) -> List[Document]:
"""Parse the pptx file.
Note:
`python-pptx` is required to read PPTX files: `pip install python-pptx`
"""
try:
from pptx import Presentation
except ImportError:
raise ImportError(
"python-pptx is required to read pptx files: `pip install python-pptx`"
)
presentation = Presentation(file)
document_list = []
for slide_number, slide in enumerate(presentation.slides, start=1):
for shape in slide.shapes:
if hasattr(shape, "text"):
metadata = {"slide_number": slide_number, "file_name": file.name}
if ext_info is not None:
metadata.update(ext_info)
# Extract the text from the shape
document_list.append(Document(text=shape.text, metadata=metadata))
return document_list