diff --git a/README.md b/README.md index 7c3dead..3b55f22 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,15 @@ Run Char Index: 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 All done, now you Word document is fully replaced keeping all the format. +## Get document keys - docx_get_keys + +You can get all the keys present in the Word document by calling the function `docx_get_keys`: + +```python +keys = docx_get_keys(doc) # Let's suppose the Word document has the keys: ${name} and ${phone} +print(keys) # ['name', 'phone'] +``` + ## Replace blocks - docx_blocks You can define a block in your Word document and set if it is going to be removed or not. The format required for key blocks are exactly like tags `HTML`, as following: diff --git a/setup.cfg b/setup.cfg index 5782386..2cea52a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = python-docx-replace -version = 0.4.3 +version = 0.4.4 author = Ivan Bicalho author_email = ivanribeirob@gmail.com description = Replace words and remove blocks inside a Word document without losing format diff --git a/src/python_docx_replace/__init__.py b/src/python_docx_replace/__init__.py index 63afccd..b00bd2b 100644 --- a/src/python_docx_replace/__init__.py +++ b/src/python_docx_replace/__init__.py @@ -1,6 +1,8 @@ +import re from typing import Any -from python_docx_replace.exceptions import EndTagNotFound, InitialTagNotFound, TableIndexNotFound +from python_docx_replace.exceptions import (EndTagNotFound, InitialTagNotFound, + TableIndexNotFound) from python_docx_replace.paragraph import Paragraph __all__ = ["docx_replace", "docx_blocks", "docx_remove_table"] @@ -89,6 +91,24 @@ def docx_remove_table(doc: Any, index: int) -> None: raise TableIndexNotFound(index, len(doc.tables)) +def docx_get_keys(doc: Any) -> set: + """ + Search for all keys in the Word document and return a list of unique elements + + ATTENTION: The required format for the keys inside the Word document is: ${key} + + For a document with the following content: "Hello ${name}, is your phone ${phone}?" + Result example: ["name", "phone"] + """ + result = set() # unique items + for p in Paragraph.get_all(doc): + paragraph = Paragraph(p) + matches = re.finditer(r"\$\{([^{}]+)\}", paragraph.get_text()) + for match in matches: + result.add(match.groups()[0]) + return list(result) + + def _handle_blocks(doc: Any, initial: str, end: str, keep_block: bool) -> bool: # The below process is a little bit complex, so I decided to comment each step look_for_initial = True diff --git a/src/python_docx_replace/paragraph.py b/src/python_docx_replace/paragraph.py index bd88772..3a63dd6 100644 --- a/src/python_docx_replace/paragraph.py +++ b/src/python_docx_replace/paragraph.py @@ -62,6 +62,9 @@ def clear_tag_and_after(self, key, keep_block) -> None: block_handler = BlockHandler(self.p) block_handler.clear_key_and_after(key, keep_block) + def get_text(self) -> str: + return self.p.text + def _simple_replace_key(self, key, value) -> None: # try to replace a key in the paragraph runs, simpler alternative for run in self.p.runs: