Docx reader
DocxReader
¶
Bases: Reader
Docx reader.
Source code in agentuniverse/agent/action/knowledge/reader/file/docx_reader.py
Python
class DocxReader(Reader):
"""Docx reader."""
def load_data(self, file: Path, ext_info: Optional[Dict] = None) -> List[Document]:
"""Parse the docx file.
Note:
The docx file cannot be process in pagination.
`docx2txt` is required to read DOCX files: `pip install docx2txt`
"""
try:
import docx2txt
except ImportError:
raise ImportError(
"docx2txt is required to read Microsoft Word files: "
"`pip install docx2txt`"
)
text = docx2txt.process(file)
metadata = {"file_name": file.name}
if ext_info is not None:
metadata.update(ext_info)
return [Document(text=text, metadata=metadata or {})]
load_data(file, ext_info=None)
¶
Parse the docx file.
Note
The docx file cannot be process in pagination.
docx2txt is required to read DOCX files: pip install docx2txt
Source code in agentuniverse/agent/action/knowledge/reader/file/docx_reader.py
Python
def load_data(self, file: Path, ext_info: Optional[Dict] = None) -> List[Document]:
"""Parse the docx file.
Note:
The docx file cannot be process in pagination.
`docx2txt` is required to read DOCX files: `pip install docx2txt`
"""
try:
import docx2txt
except ImportError:
raise ImportError(
"docx2txt is required to read Microsoft Word files: "
"`pip install docx2txt`"
)
text = docx2txt.process(file)
metadata = {"file_name": file.name}
if ext_info is not None:
metadata.update(ext_info)
return [Document(text=text, metadata=metadata or {})]