diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 0000000000..1b0e85e60a --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,31 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [published] + workflow_dispatch: + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel build twine + pip install -r requirements.txt + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: make pypi diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ec12c8934..c3ad521248 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,88 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +## [3.0.0] - 2025-01-09 + +Changed: + + - Merge v2 master into new-procesor-api + - PAGE API: Update to latest generateDS 2.44.1, bertsky/core#21 + +Fixed: + + - `ocrd --help` output was broken for multiline config options, bertsky/core#25 + - Call `initLogging` before instantiating processors in `ocrd_cli_wrap_processor`, bertsky/core#24, #1296 + - PAGE API: Fully reversable mapping from/to XML element/generateDS instances, bertsky/core#21 + +Added: + + - `ocrd-filter` processor to remove segments based on XPath expressions, bertsky/core#21 + - XPath function `pc:pixelarea` for the number of pixels of the bounding box (or sum area on node sets), bertsky/core#21 + - XPath function `pc:textequiv` for the first TextEquiv unicode string (or concatenated string on node sets), bertsky/core#21 + +## [3.0.0b7] - 2024-11-12 + +Fixed: + - `initLogging`: only add root handler instead of multiple redundant handlers with `propagate=false` + - `setOverrideLogLevel`: override all currently active loggers' level + +Changed: + - :fire: logging: increase default root (not `ocrd`) level from `INFO` to `WARNING` + - :fire: `initLogging`: do not remove any previous handlers/levels, unless `force_reinit` + - :fire: `disableLogging`: remove all handlers, reset all levels - instead of being selective + - :fire: Processor: replace `weakref` with `__del__` to trigger `shutdown` + - :fire: `OCRD_MAX_PARALLEL_PAGES>1`: log via `QueueHandler` in subprocess, `QueueListener` in main + +## [3.0.0b6] - 2024-10-30 + +Fixed: + - `OcrdMets.get_physical_pages`: cover `return_divs` w/o `for_fileIds` and `for_pageIds` + +Changed: + - :fire: `ocrd_utils.initLogging`: also add handler to root logger (as in file config), + but disable message propagation to avoid duplication + - only import `ocrd_network` in `src/ocrd/decorators/__init__.py` once needed + - `Processor.process_page_file`: skip computing `process_page_pcgts` if output already exists, + but `OCRD_EXISTING_OUTPUT!=OVERWRITE` + - :fire: `OCRD_MAX_PARALLEL_PAGES>1`: switch from multithreading to multiprocessing, depend on + `loky` instead of stdlib `concurrent.futures` + - `OCRD_PROCESSING_PAGE_TIMEOUT>0`: actually enforce timeout within worker + - `OCRD_MAX_MISSING_OUTPUTS>0`: abort early if too many failures already, prospectively + - `Processor.process_workspace`: split up into overridable sub-methods: + - `process_workspace_submit_tasks` (iterate input file group and schedule page tasks) + - `process_workspace_submit_page_task` (download input files and submit single page task) + - `process_workspace_handle_tasks` (monitor page tasks and aggregate results) + - `process_workspace_handle_page_task` (await single page task and handle errors) + + +## [3.0.0b5] - 2024-09-16 + +Fixed: + - tests: ensure `ocrd_utils.config` gets reset whenever changing it globally + - `OcrdMetsServer.add_file`: pass on `force` kwarg + - `ocrd.cli.workspace`: consistently pass on `--mets-server-url` and `--backup` + - `ocrd.cli.validate "tasks"`: pass on `--mets-server-url` + - `ocrd.cli.bashlib "input-files"`: pass on `--mets-server-url` + - `lib.bash input-files`: pass on `--mets-server-url`, `--overwrite`, and parameters + - `lib.bash`: fix `errexit` handling + - `ocrd.cli.ocrd-tool "resolve-resource"`: forgot to actually print result + +Changed: + - :fire: `Processor` / `Workspace.add_file`: always `force` if `OCRD_EXISTING_OUTPUT==OVERWRITE` + - :fire: `Processor.verify`: revert 3.0.0b1 enforcing cardinality checks (stay backwards compatible) + - :fire: `Processor.verify`: check output fileGrps, too + (must not exist unless `OCRD_EXISTING_OUTPUT=OVERWRITE|SKIP` or disjoint `--page-id` range) + - lib.bash `input-files`: do not try to validate tasks here (now covered by `Processor.verify()`) + - `run_processor`: be robust if `ocrd_tool` is missing `steps` + - `PcGtsType.PageType.id` via `make_xml_id`: replace `/` with `_` + +Added: + - `OcrdPage`: new `PageType.get_ReadingOrderGroups()` to retrieve recursive RO as dict + - ocrd.cli.workspace `server`: add subcommands `reload` and `save` + - METS Server: export and delegate `physical_pages` + - processor CLI: delegate `--resolve-resource`, too + - `Processor.process_page_file` / `OcrdPageResultImage`: allow `None` besides `AlternativeImageType` + ## [3.0.0b4] - 2024-09-02 Fixed: @@ -100,6 +182,79 @@ Added: - `Processor.verify`: handle fileGrp cardinality verification, with default implementation - `Processor.setup`: to set up processor before processing, optional +## [2.71.1] - 2025-01-06 + +Changed: + + * Do `initLogging` before calling processors in `ocrd_cli_wrap_processor`, #1232, #1296 + +## [2.71.0] - 2024-11-20 + +Changed: + + * Rewrite `ocrd_utils.logging`, #1288 + * Handle only `''` as the root logger + * `disableLogging`: Remove handlers from root and all configured loggers + * Do not do any module-level modification of the log config + +Fixed: + + * Typo in processing_worker log message, #1293 + * Call `initLogging` at the right time in `ocrd_network`, #1292 + * `make docs` fixed with absolute path to location, #1273 + +## [2.70.0] - 2024-10-10 + +Added: + + - `ocrd network client workflow run`: Add `--print-status` flag to periodically print the job status, #1277 + - Processing Server: `DELETE /mets_server_zombies` to kill any renegade METS servers, #1277 + - No more zombie METS Server by properly shutting them down, #1284 + - `OCRD_NETWORK_RABBITMQ_HEARBEAT` to allow overriding the [heartbeat](https://pika.readthedocs.io/en/stable/examples/heartbeat_and_blocked_timeouts.html) behavior of RabbitMQ, #1285 + +Changed: + + - significantly more detailed logging for the METS Server and Processing Server, #1284 + - Only import `ocrd_network` in src/ocrd/decorators/__init__.py once needed, #1289 + - Automate release via GitHub Actions, #1290 + +Fixed: + + - `ocrd/core-cuda-torch`: Install torchvision as well, #1286 + - Processing Server: remove shut down METS servers from deployer's cache, #1287 + - typos, #1274 + +## [2.69.0] - 2024-09-30 + +Fixed: + - tests: ensure `ocrd_utils.config` gets reset whenever changing it globally + - `ocrd.cli.workspace`: consistently pass on `--mets-server-url` and `--backup` + - `ocrd.cli.workspace`: make `list-page` work w/ METS Server + - `ocrd.cli.validate "tasks"`: pass on `--mets-server-url` + - `lib.bash`: fix `errexit` handling + - actually apply CLI `--log-filename`, and show in `--help` + - adapt to Pillow changes + - `ocrd workspace clone`: do pass on `--file-grp` (for download filtering) + - `OcrdMetsServer.add_file`: pass on `force` kwarg + - `Workspace.reload_mets`: handle ClientSideOcrdMets as well + - `OcrdMets.get_physical_pages`: cover `return_divs` w/o `for_fileIds` and `for_pageIds` + - `disableLogging`: also re-instate root logger to Python defaults + - `OcrdExif`: handle multi-frame TIFFs gracefully in `identify` callout, #1276 + +Changed: + - `run_processor`: be robust if `ocrd_tool` is missing `steps` + - `PcGtsType.PageType.id` via `make_xml_id`: replace `/` with `_` + - `ClientSideOcrdMets`: use same logger name prefix as METS Server + - `Processor.zip_input_files`: when `--page-id` yields empty list, just log instead of raise + +Added: + - `OcrdPage`: new `PageType.get_ReadingOrderGroups()` to retrieve recursive RO as dict + - METS Server: export and delegate `physical_pages` + - ocrd.cli.workspace `server`: add subcommands `reload` and `save` + - processor CLI: delegate `--resolve-resource`, too + - `OcrdConfig.reset_defaults` to reset config variables to their defaults + - `ocrd_utils.scale_coordinates` for resizing images + ## [2.68.0] - 2024-08-23 Changed: @@ -2259,11 +2414,20 @@ Fixed Initial Release +[3.0.0]: ../../compare/v3.0.0..v3.0.0b7 +[3.0.0b7]: ../../compare/v3.0.0b7..v3.0.0b6 +[3.0.0b6]: ../../compare/v3.0.0b6..v3.0.0b5 +[3.0.0b5]: ../../compare/v3.0.0b5..v3.0.0b4 +[3.0.0b4]: ../../compare/v3.0.0b4..v3.0.0b3 [3.0.0b3]: ../../compare/v3.0.0b3..v3.0.0b2 [3.0.0b2]: ../../compare/v3.0.0b2..v3.0.0b1 [3.0.0b1]: ../../compare/v3.0.0b1..v3.0.0a2 [3.0.0a2]: ../../compare/v3.0.0a2..v3.0.0a1 [3.0.0a1]: ../../compare/v3.0.0a1..v2.67.2 +[2.71.1]: ../../compare/v2.71.1..v2.71.0 +[2.71.0]: ../../compare/v2.71.0..v2.70.0 +[2.70.0]: ../../compare/v2.70.0..v2.69.0 +[2.69.0]: ../../compare/v2.69.0..v2.68.0 [2.68.0]: ../../compare/v2.68.0..v2.67.2 [2.67.2]: ../../compare/v2.67.2..v2.67.1 [2.67.1]: ../../compare/v2.67.1..v2.67.0 diff --git a/Dockerfile.cuda-torch b/Dockerfile.cuda-torch index 8d6c3aa624..59ce1144be 100644 --- a/Dockerfile.cuda-torch +++ b/Dockerfile.cuda-torch @@ -9,7 +9,5 @@ RUN make deps-torch WORKDIR /data -RUN rm -fr /build - CMD ["/usr/local/bin/ocrd", "--help"] diff --git a/Makefile b/Makefile index 1a4a6bbdb8..bb51269558 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ deps-cuda: CONDA_EXE ?= /usr/local/bin/conda deps-cuda: export CONDA_PREFIX ?= /conda deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])' deps-cuda: - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba + curl --retry 6 -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba mv bin/micromamba $(CONDA_EXE) # Install Conda system-wide (for interactive / login shells) echo 'export MAMBA_EXE=$(CONDA_EXE) MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) CONDA_PREFIX=$(CONDA_PREFIX) PATH=$(CONDA_PREFIX)/bin:$$PATH' >> /etc/profile.d/98-conda.sh @@ -97,7 +97,7 @@ deps-cuda: # works, too: shopt -s nullglob; \ $(PIP) install nvidia-pyindex \ - && $(PIP) install nvidia-cudnn-cu11==8.7.* \ + && $(PIP) install nvidia-cudnn-cu11~=8.7 \ nvidia-cublas-cu11~=11.11 \ nvidia-cusparse-cu11~=11.7 \ nvidia-cusolver-cu11~=11.4 \ @@ -158,7 +158,7 @@ deps-tf2: fi deps-torch: - $(PIP) install -i https://download.pytorch.org/whl/cu118 torch + $(PIP) install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118 # Dependencies for deployment in an ubuntu/debian linux deps-ubuntu: @@ -178,7 +178,7 @@ build: # (Re)install the tool install: #build - # not stricttly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166 + # not strictly necessary but a precaution against outdated python build tools, https://github.com/OCR-D/core/pull/1166 $(PIP) install -U pip wheel $(PIP_INSTALL) . $(PIP_INSTALL_CONFIG_OPTION) @# workaround for shapely#1598 diff --git a/VERSION b/VERSION index 9414e12700..4a36342fca 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0b4 +3.0.0 diff --git a/docs/api/ocrd_network/ocrd_network.client_utils.rst b/docs/api/ocrd_network/ocrd_network.client_utils.rst new file mode 100644 index 0000000000..973e27cdb5 --- /dev/null +++ b/docs/api/ocrd_network/ocrd_network.client_utils.rst @@ -0,0 +1,7 @@ +ocrd\_network.client\_utils module +================================== + +.. automodule:: ocrd_network.client_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/ocrd_network/ocrd_network.rst b/docs/api/ocrd_network/ocrd_network.rst index 4497702751..d61da39313 100644 --- a/docs/api/ocrd_network/ocrd_network.rst +++ b/docs/api/ocrd_network/ocrd_network.rst @@ -24,6 +24,7 @@ Submodules :maxdepth: 4 ocrd_network.client + ocrd_network.client_utils ocrd_network.constants ocrd_network.database ocrd_network.logging_utils diff --git a/docs/conf.py b/docs/conf.py index 917c5c62ca..939277ad5f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,7 +15,8 @@ # import os # import sys # # sys.path.insert(0, os.path.abspath('..')) -with open('../VERSION', encoding='utf-8') as f: +from pathlib import Path +with open(Path(__file__).parent.parent / 'VERSION', encoding='utf-8') as f: VERSION = f.read() diff --git a/pyproject.toml b/pyproject.toml index 5a081bb91e..0e643c23ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ Issues = "https://github.com/OCR-D/core/issues" [project.scripts] ocrd = "ocrd.cli:cli" ocrd-dummy = "ocrd.processor.builtin.dummy_processor:cli" +ocrd-filter = "ocrd.processor.builtin.filter_processor:cli" [tool.setuptools] include-package-data = true diff --git a/repo/assets b/repo/assets index 05568aaa2d..ca108faf0e 160000 --- a/repo/assets +++ b/repo/assets @@ -1 +1 @@ -Subproject commit 05568aaa2dc20678bf87ffec77f3baf2924d7c24 +Subproject commit ca108faf0e95cc823a9e84cd0a1602282ae006b1 diff --git a/repo/spec b/repo/spec index df2a07e3fd..506b33936d 160000 --- a/repo/spec +++ b/repo/spec @@ -1 +1 @@ -Subproject commit df2a07e3fda634b2eda5785afe67399b61a81173 +Subproject commit 506b33936d89080a683fa8a26837f2a23b23e5e2 diff --git a/requirements.txt b/requirements.txt index e78c186618..3e2dc689b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ click >=7 cryptography < 43.0.0 Deprecated == 1.2.0 docker +elementpath fastapi>=0.78.0 filetype Flask @@ -13,6 +14,7 @@ httpx>=0.22.0 importlib_metadata ; python_version < '3.8' importlib_resources ; python_version < '3.10' jsonschema>=4 +loky lxml memory-profiler >= 0.58.0 # XXX explicitly do not restrict the numpy version because different diff --git a/requirements_test.txt b/requirements_test.txt index a6a87918fc..585bb53954 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,7 +1,7 @@ autopep8 cryptography < 43.0.0 pytest >= 4.0.0 -generateDS == 2.35.20 +generateDS == 2.44.1 pytest-benchmark >= 3.2.3 pytest-timeout coverage >= 4.5.2 diff --git a/src/ocrd/cli/__init__.py b/src/ocrd/cli/__init__.py index 9e8a37b8bf..794538752d 100644 --- a/src/ocrd/cli/__init__.py +++ b/src/ocrd/cli/__init__.py @@ -16,7 +16,7 @@ def command_with_replaced_help(*replacements): class CommandWithReplacedHelp(click.Command): def get_help(self, ctx): - newhelp = super().get_help(ctx) + newhelp : str = super().get_help(ctx) for replacement in replacements: newhelp = re.sub(*replacement, newhelp) # print(newhelp) @@ -83,6 +83,8 @@ def get_help(self, ctx): \b {config.describe('OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS')} \b +{config.describe('OCRD_NETWORK_RABBITMQ_HEARTBEAT')} +\b {config.describe('OCRD_PROFILE_FILE')} \b {config.describe('OCRD_PROFILE', wrap_text=False)} diff --git a/src/ocrd/cli/workspace.py b/src/ocrd/cli/workspace.py index ca4e8629db..77797b3037 100644 --- a/src/ocrd/cli/workspace.py +++ b/src/ocrd/cli/workspace.py @@ -149,7 +149,8 @@ def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mim LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of argument 'WORKSPACE_DIR' ('%s')" % workspace_dir)) ctx.directory = workspace_dir - assert not ctx.mets_server_url + assert not ctx.mets_server_url, \ + f"clone cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.resolver.workspace_from_url( mets_url, dst_dir=ctx.directory, @@ -185,7 +186,8 @@ def workspace_init(ctx, clobber_mets, directory): if directory: LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of argument 'DIRECTORY' ('%s')" % directory)) ctx.directory = directory - assert not ctx.mets_server_url + assert not ctx.mets_server_url, \ + f"init cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.resolver.workspace_from_nothing( directory=ctx.directory, mets_basename=ctx.mets_basename, @@ -506,6 +508,8 @@ def workspace_remove_file(ctx, id, force, keep_file): # pylint: disable=redefin (If any ``ID`` starts with ``//``, then its remainder will be interpreted as a regular expression.) """ + assert not ctx.mets_server_url, \ + f"remove cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.workspace() for i in id: workspace.remove_file(i, force=force, keep_file=keep_file) @@ -524,6 +528,8 @@ def rename_group(ctx, old, new): """ Rename fileGrp (USE attribute ``NEW`` to ``OLD``). """ + assert not ctx.mets_server_url, \ + f"rename-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.workspace() workspace.rename_file_group(old, new) workspace.save_mets() @@ -545,6 +551,8 @@ def remove_group(ctx, group, recursive, force, keep_files): (If any ``GROUP`` starts with ``//``, then its remainder will be interpreted as a regular expression.) """ + assert not ctx.mets_server_url, \ + f"remove-group cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.workspace() for g in group: workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files) @@ -567,6 +575,8 @@ def prune_files(ctx, file_grp, mimetype, page_id, file_id): (If any ``FILTER`` starts with ``//``, then its remainder will be interpreted as a regular expression.) """ + assert not ctx.mets_server_url, \ + f"prune-files cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.workspace() with pushd_popd(workspace.directory): for f in workspace.find_files( @@ -673,19 +683,15 @@ def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page will be interpreted as a regular expression.) """ workspace = ctx.workspace() - find_kwargs = {} - if page_id_range and 'ID' in output_field: - find_kwargs['pageId'] = page_id_range - page_ids = sorted({x.pageId for x in workspace.mets.find_files(**find_kwargs) if x.pageId}) ret = [] - - if output_field == ['ID']: - ret = [[x] for x in page_ids] - else: - for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=','.join(page_ids), return_divs=True)): + if page_id_range or list(output_field) != ['ID']: + for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=page_id_range, return_divs=True)): ret.append([]) for k in output_field: ret[i].append(page_div.get(k, 'None')) + else: + for page_id in workspace.mets.physical_pages: + ret.append([page_id]) if numeric_range: start, end = map(int, numeric_range.split('..')) @@ -762,6 +768,8 @@ def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id): if contentids: update_kwargs['CONTENTIDS'] = contentids try: + assert not ctx.mets_server_url, \ + f"update-page cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.workspace() workspace.mets.update_physical_page_attributes(page_id, **update_kwargs) workspace.save_mets() @@ -800,6 +808,8 @@ def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pa mets_path = Path(mets_path) if filegrp_mapping: filegrp_mapping = loads(filegrp_mapping) + assert not ctx.mets_server_url, \ + f"merge cannot be performed with METS Server - stop server, rerun without -U {ctx.mets_server_url}" workspace = ctx.workspace() other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name)) workspace.merge( diff --git a/src/ocrd/decorators/__init__.py b/src/ocrd/decorators/__init__.py index f52a13575b..6e0ceb1f1c 100644 --- a/src/ocrd/decorators/__init__.py +++ b/src/ocrd/decorators/__init__.py @@ -13,7 +13,6 @@ redirect_stderr_and_stdout_to_file, ) from ocrd_validators import WorkspaceValidator -from ocrd_network import ProcessingWorker, ProcessorServer, AgentType from ..resolver import Resolver from ..processor.base import ResourceNotFoundError, run_processor @@ -23,8 +22,6 @@ from .ocrd_cli_options import ocrd_cli_options from .mets_find_options import mets_find_options -SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER] - def ocrd_cli_wrap_processor( processorClass, @@ -51,6 +48,9 @@ def ocrd_cli_wrap_processor( # ocrd_network params end # **kwargs ): + # init logging handlers so no imported libs can preempt ours + initLogging() + # FIXME: remove workspace arg entirely processor = processorClass(None) if not sys.argv[1:]: @@ -88,14 +88,10 @@ def ocrd_cli_wrap_processor( if list_resources: processor.list_resources() sys.exit() - if subcommand: + if subcommand or address or queue or database: # Used for checking/starting network agents for the WebAPI architecture check_and_run_network_agent(processorClass, subcommand, address, database, queue) - elif address or queue or database: - raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}") - # from here: single-run processing context - initLogging() if 'parameter' in kwargs: # Disambiguate parameter file/literal, and resolve file def resolve(name): @@ -162,6 +158,11 @@ def goexit(): def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str): """ """ + from ocrd_network import ProcessingWorker, ProcessorServer, AgentType + SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER] + + if not subcommand: + raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}") if subcommand not in SUBCOMMANDS: raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}") diff --git a/src/ocrd/mets_server.py b/src/ocrd/mets_server.py index 101727e064..e0f0029570 100644 --- a/src/ocrd/mets_server.py +++ b/src/ocrd/mets_server.py @@ -1,8 +1,10 @@ """ # METS server functionality """ +import os import re from os import _exit, chmod +import signal from typing import Dict, Optional, Union, List, Tuple from time import sleep from pathlib import Path @@ -155,13 +157,13 @@ def save(self): Request writing the changes to the file system """ if not self.multiplexing_mode: - self.session.request("PUT", url=self.url) + return self.session.request("PUT", url=self.url).text else: - self.session.request( + return self.session.request( "POST", self.url, json=MpxReq.save(self.ws_dir_path) - ) + ).json()["text"] def stop(self): """ @@ -169,14 +171,13 @@ def stop(self): """ try: if not self.multiplexing_mode: - self.session.request("DELETE", self.url) - return + return self.session.request("DELETE", self.url).text else: - self.session.request( + return self.session.request( "POST", self.url, json=MpxReq.stop(self.ws_dir_path) - ) + ).json()["text"] except ConnectionError: # Expected because we exit the process without returning pass @@ -323,7 +324,7 @@ def add_file( class MpxReq: - """This class wrapps the request bodies needed for the tcp forwarding + """This class wraps the request bodies needed for the tcp forwarding For every mets-server-call like find_files or workspace_path a special request_body is needed to call `MetsServerProxy.forward_tcp_request`. These are created by this functions. @@ -346,12 +347,12 @@ def __args_wrapper( @staticmethod def save(ws_dir_path: str) -> Dict: return MpxReq.__args_wrapper( - ws_dir_path, method_type="PUT", response_type="empty", request_url="", request_data={}) + ws_dir_path, method_type="PUT", response_type="text", request_url="", request_data={}) @staticmethod def stop(ws_dir_path: str) -> Dict: return MpxReq.__args_wrapper( - ws_dir_path, method_type="DELETE", response_type="empty", request_url="", request_data={}) + ws_dir_path, method_type="DELETE", response_type="text", request_url="", request_data={}) @staticmethod def reload(ws_dir_path: str) -> Dict: @@ -428,18 +429,24 @@ def create_process(mets_server_url: str, ws_dir_path: str, log_file: str) -> int @staticmethod def kill_process(mets_server_pid: int): - subprocess_run(args=["kill", "-s", "SIGINT", f"{mets_server_pid}"], shell=False, universal_newlines=True) + os.kill(mets_server_pid, signal.SIGINT) + sleep(3) + try: + os.kill(mets_server_pid, signal.SIGKILL) + except ProcessLookupError as e: + pass def shutdown(self): + pid = os.getpid() + self.log.info(f"Shutdown method of mets server[{pid}] invoked, sending SIGTERM signal.") + os.kill(pid, signal.SIGTERM) if self.is_uds: if Path(self.url).exists(): - self.log.debug(f'UDS socket {self.url} still exists, removing it') + self.log.warning(f"Due to a server shutdown, removing the existing UDS socket file: {self.url}") Path(self.url).unlink() - # os._exit because uvicorn catches SystemExit raised by sys.exit - _exit(0) def startup(self): - self.log.info("Starting up METS server") + self.log.info(f"Configuring the Mets Server") workspace = self.workspace @@ -465,32 +472,49 @@ def save(): """ Write current changes to the file system """ - return workspace.save_mets() + workspace.save_mets() + response = Response(content="The Mets Server is writing changes to disk.", media_type='text/plain') + self.log.info(f"PUT / -> {response.__dict__}") + return response @app.delete(path='/') - async def stop(): + def stop(): """ Stop the mets server """ - getLogger('ocrd.models.ocrd_mets').info(f'Shutting down METS Server {self.url}') workspace.save_mets() + response = Response(content="The Mets Server will shut down soon...", media_type='text/plain') self.shutdown() + self.log.info(f"DELETE / -> {response.__dict__}") + return response @app.post(path='/reload') - async def workspace_reload_mets(): + def workspace_reload_mets(): """ Reload mets file from the file system """ workspace.reload_mets() - return Response(content=f'Reloaded from {workspace.directory}', media_type="text/plain") + response = Response(content=f"Reloaded from {workspace.directory}", media_type='text/plain') + self.log.info(f"POST /reload -> {response.__dict__}") + return response @app.get(path='/unique_identifier', response_model=str) async def unique_identifier(): - return Response(content=workspace.mets.unique_identifier, media_type='text/plain') + response = Response(content=workspace.mets.unique_identifier, media_type='text/plain') + self.log.info(f"GET /unique_identifier -> {response.__dict__}") + return response @app.get(path='/workspace_path', response_model=str) async def workspace_path(): - return Response(content=workspace.directory, media_type="text/plain") + response = Response(content=workspace.directory, media_type="text/plain") + self.log.info(f"GET /workspace_path -> {response.__dict__}") + return response + + @app.get(path='/physical_pages', response_model=OcrdPageListModel) + async def physical_pages(): + response = {'physical_pages': workspace.mets.physical_pages} + self.log.info(f"GET /physical_pages -> {response}") + return response @app.get(path='/physical_pages', response_model=OcrdPageListModel) async def physical_pages(): @@ -498,18 +522,24 @@ async def physical_pages(): @app.get(path='/file_groups', response_model=OcrdFileGroupListModel) async def file_groups(): - return {'file_groups': workspace.mets.file_groups} + response = {'file_groups': workspace.mets.file_groups} + self.log.info(f"GET /file_groups -> {response}") + return response @app.get(path='/agent', response_model=OcrdAgentListModel) async def agents(): - return OcrdAgentListModel.create(workspace.mets.agents) + response = OcrdAgentListModel.create(workspace.mets.agents) + self.log.info(f"GET /agent -> {response.__dict__}") + return response @app.post(path='/agent', response_model=OcrdAgentModel) async def add_agent(agent: OcrdAgentModel): kwargs = agent.dict() kwargs['_type'] = kwargs.pop('type') workspace.mets.add_agent(**kwargs) - return agent + response = agent + self.log.info(f"POST /agent -> {response.__dict__}") + return response @app.get(path="/file", response_model=OcrdFileListModel) async def find_files( @@ -526,7 +556,9 @@ async def find_files( found = workspace.mets.find_all_files( fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype, local_filename=local_filename, url=url ) - return OcrdFileListModel.create(found) + response = OcrdFileListModel.create(found) + self.log.info(f"GET /file -> {response.__dict__}") + return response @app.post(path='/file', response_model=OcrdFileModel) async def add_file( @@ -549,7 +581,9 @@ async def add_file( # Add to workspace kwargs = file_resource.dict() workspace.add_file(**kwargs, force=force) - return file_resource + response = file_resource + self.log.info(f"POST /file -> {response.__dict__}") + return response # ------------- # @@ -557,9 +591,6 @@ async def add_file( # Create socket and change to world-readable and -writable to avoid permission errors self.log.debug(f"chmod 0o677 {self.url}") server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - if Path(self.url).exists() and not is_socket_in_use(self.url): - # remove leftover unused socket which blocks startup - Path(self.url).unlink() server.bind(self.url) # creates the socket file atexit.register(self.shutdown) server.close() @@ -571,16 +602,5 @@ async def add_file( uvicorn_kwargs['log_config'] = None uvicorn_kwargs['access_log'] = False - self.log.debug("Starting uvicorn") + self.log.info("Starting the uvicorn Mets Server") uvicorn.run(app, **uvicorn_kwargs) - - -def is_socket_in_use(socket_path): - if Path(socket_path).exists(): - client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - try: - client.connect(socket_path) - except OSError: - return False - client.close() - return True diff --git a/src/ocrd/processor/base.py b/src/ocrd/processor/base.py index 26ea532d16..d6348b40e1 100644 --- a/src/ocrd/processor/base.py +++ b/src/ocrd/processor/base.py @@ -16,14 +16,21 @@ import os from os import getcwd from pathlib import Path -from typing import Any, List, Optional, Union, get_args +from typing import Any, Dict, List, Optional, Tuple, Union, get_args import sys +import logging +import logging.handlers import inspect import tarfile import io -import weakref +from collections import defaultdict from frozendict import frozendict -from concurrent.futures import ThreadPoolExecutor, TimeoutError +# concurrent.futures is buggy in py38, +# this is where the fixes came from: +from loky import Future, ProcessPoolExecutor +import multiprocessing as mp +from threading import Timer +from _thread import interrupt_main from click import wrap_text from deprecated import deprecated @@ -105,6 +112,31 @@ def __init__(self, fileGrp, pageId, mimetype): f"and pageId {pageId} under mimetype {mimetype or 'PAGE+image(s)'}") super().__init__(self.message) +class DummyFuture: + """ + Mimics some of `concurrent.futures.Future` but runs immediately. + """ + def __init__(self, fn, *args, **kwargs): + self.fn = fn + self.args = args + self.kwargs = kwargs + def result(self): + return self.fn(*self.args, **self.kwargs) +class DummyExecutor: + """ + Mimics some of `concurrent.futures.ProcessPoolExecutor` but runs + everything immediately in this process. + """ + def __init__(self, initializer=None, initargs=(), **kwargs): + initializer(*initargs) + def shutdown(self, **kwargs): + pass + def submit(self, fn, *args, **kwargs) -> DummyFuture: + return DummyFuture(fn, *args, **kwargs) + +TFuture = Union[DummyFuture, Future] +TExecutor = Union[DummyExecutor, ProcessPoolExecutor] + class Processor(): """ A processor is a tool that implements the uniform OCR-D @@ -127,12 +159,12 @@ class Processor(): max_workers : int = -1 """ - maximum number of processor threads for page-parallel processing (ignored if negative), + maximum number of processor forks for page-parallel processing (ignored if negative), to be applied on top of :py:data:`~ocrd_utils.config.OCRD_MAX_PARALLEL_PAGES` (i.e. whatever is smaller). (Override this if you know how many pages fit into processing units - GPU shaders / CPU cores - - at once, or if your class is not thread-safe.) + - at once, or if your class already creates threads prior to forking, e.g. during ``setup``.) """ max_page_seconds : int = -1 @@ -335,12 +367,14 @@ def __init__( self._base_logger = getLogger('ocrd.processor.base') if parameter is not None: self.parameter = parameter - # ensure that shutdown gets called at destruction - self._finalizer = weakref.finalize(self, self.shutdown) # workaround for deprecated#72 (@deprecated decorator does not work for subclasses): setattr(self, 'process', deprecated(version='3.0', reason='process() should be replaced with process_page_pcgts() or process_page_file() or process_workspace()')(getattr(self, 'process'))) + def __del__(self): + self._base_logger.debug("shutting down") + self.shutdown() + def show_help(self, subcommand=None): """ Print a usage description including the standard CLI and all of this processor's ocrd-tool @@ -456,6 +490,9 @@ def process_workspace(self, workspace: Workspace) -> None: for the given :py:data:`page_id` (or all pages) under the given :py:data:`parameter`. + Delegates to :py:meth:`.process_workspace_submit_tasks` + and :py:meth:`.process_workspace_handle_tasks`. + (This will iterate over pages and files, calling :py:meth:`.process_page_file` and handling exceptions. It should be overridden by subclasses to handle cases @@ -465,11 +502,7 @@ def process_workspace(self, workspace: Workspace) -> None: self.workspace = workspace self.verify() try: - nr_succeeded = 0 - nr_skipped = 0 - nr_copied = 0 - - # set up multithreading + # set up multitasking max_workers = max(0, config.OCRD_MAX_PARALLEL_PAGES) if self.max_workers > 0 and self.max_workers < config.OCRD_MAX_PARALLEL_PAGES: self._base_logger.info("limiting number of threads from %d to %d", max_workers, self.max_workers) @@ -481,80 +514,34 @@ def process_workspace(self, workspace: Workspace) -> None: if self.max_page_seconds > 0 and self.max_page_seconds < config.OCRD_PROCESSING_PAGE_TIMEOUT: self._base_logger.info("limiting page timeout from %d to %d sec", max_seconds, self.max_page_seconds) max_seconds = self.max_page_seconds - executor = ThreadPoolExecutor( + + if max_workers > 1: + executor_cls = ProcessPoolExecutor + log_queue = mp.Queue() + # forward messages from log queue (in subprocesses) to all root handlers + log_listener = logging.handlers.QueueListener(log_queue, *logging.root.handlers, respect_handler_level=True) + else: + executor_cls = DummyExecutor + log_queue = None + log_listener = None + executor = executor_cls( max_workers=max_workers or 1, - thread_name_prefix=f"pagetask.{workspace.mets.unique_identifier}" + # only forking method avoids pickling + context=mp.get_context('fork'), + # share processor instance as global to avoid pickling + initializer=_page_worker_set_ctxt, + initargs=(self, log_queue), ) - self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1) - tasks = {} - - for input_file_tuple in self.zip_input_files(on_error='abort', require_first=False): - input_files : List[Optional[OcrdFileType]] = [None] * len(input_file_tuple) - page_id = next(input_file.pageId - for input_file in input_file_tuple - if input_file) - self._base_logger.info(f"preparing page {page_id}") - for i, input_file in enumerate(input_file_tuple): - if input_file is None: - # file/page not found in this file grp - continue - input_files[i] = input_file - if not self.download: - continue - try: - input_files[i] = self.workspace.download_file(input_file) - except (ValueError, FileNotFoundError, HTTPError) as e: - self._base_logger.error(repr(e)) - self._base_logger.warning(f"failed downloading file {input_file} for page {page_id}") - # process page - tasks[executor.submit(self.process_page_file, *input_files)] = (page_id, input_files) - self._base_logger.debug("submitted %d processing tasks", len(tasks)) - - for task in tasks: - # wait for results, handle errors - page_id, input_files = tasks[task] - # FIXME: differentiate error cases in various ways: - # - ResourceNotFoundError → use ResourceManager to download (once), then retry - # - transient (I/O or OOM) error → maybe sleep, retry - # - persistent (data) error → skip / dummy / raise - try: - self._base_logger.debug("waiting for output of task %s (page %s) max_seconds=%d", task, page_id, max_seconds) - task.result(timeout=max_seconds or None) - nr_succeeded += 1 - # exclude NotImplementedError, so we can try process() below - except NotImplementedError: - raise - # handle input failures separately - except FileExistsError as err: - if config.OCRD_EXISTING_OUTPUT == 'ABORT': - raise err - if config.OCRD_EXISTING_OUTPUT == 'SKIP': - continue - if config.OCRD_EXISTING_OUTPUT == 'OVERWRITE': - # too late here, must not happen - raise Exception(f"got {err} despite OCRD_EXISTING_OUTPUT==OVERWRITE") - # broad coverage of output failures (including TimeoutError) - except (Exception, TimeoutError) as err: - # FIXME: add re-usable/actionable logging - if config.OCRD_MISSING_OUTPUT == 'ABORT': - self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}") - raise err - self._base_logger.exception(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}") - if config.OCRD_MISSING_OUTPUT == 'SKIP': - nr_skipped += 1 - continue - if config.OCRD_MISSING_OUTPUT == 'COPY': - self._copy_page_file(input_files[0]) - nr_copied += 1 - else: - desc = config.describe('OCRD_MISSING_OUTPUT', wrap_text=False, indent_text=False) - raise ValueError(f"unknown configuration value {config.OCRD_MISSING_OUTPUT} - {desc}") - - if nr_skipped > 0 and nr_succeeded / nr_skipped < config.OCRD_MAX_MISSING_OUTPUTS: - raise Exception(f"too many failures with skipped output ({nr_skipped})") - if nr_copied > 0 and nr_succeeded / nr_copied < config.OCRD_MAX_MISSING_OUTPUTS: - raise Exception(f"too many failures with fallback output ({nr_skipped})") - executor.shutdown() + if max_workers > 1: + log_listener.start() + try: + self._base_logger.debug("started executor %s with %d workers", str(executor), max_workers or 1) + tasks = self.process_workspace_submit_tasks(executor, max_seconds) + stats = self.process_workspace_handle_tasks(tasks) + finally: + executor.shutdown(kill_workers=True, wait=False) + if max_workers > 1: + log_listener.stop() except NotImplementedError: # fall back to deprecated method @@ -564,6 +551,190 @@ def process_workspace(self, workspace: Workspace) -> None: # suppress the NotImplementedError context raise err from None + def process_workspace_submit_tasks(self, executor : TExecutor, max_seconds : int) -> Dict[TFuture, Tuple[str, List[Optional[OcrdFileType]]]]: + """ + Look up all input files of the given ``workspace`` + from the given :py:data:`input_file_grp` + for the given :py:data:`page_id` (or all pages), + and schedules calling :py:meth:`.process_page_file` + on them for each page via `executor` (enforcing + a per-page time limit of `max_seconds`). + + When running with `OCRD_MAX_PARALLEL_PAGES>1` and + the workspace via METS Server, the executor will fork + this many worker parallel subprocesses each processing + one page at a time. (Interprocess communication is + done via task and result queues.) + + Otherwise, tasks are run sequentially in the + current process. + + Delegates to :py:meth:`.zip_input_files` to get + the input files for each page, and then calls + :py:meth:`.process_workspace_submit_page_task`. + + Returns a dict mapping the per-page tasks + (i.e. futures submitted to the executor) + to their corresponding pageId and input files. + """ + tasks = {} + for input_file_tuple in self.zip_input_files(on_error='abort', require_first=False): + task, page_id, input_files = self.process_workspace_submit_page_task(executor, max_seconds, input_file_tuple) + tasks[task] = (page_id, input_files) + self._base_logger.debug("submitted %d processing tasks", len(tasks)) + return tasks + + def process_workspace_submit_page_task(self, executor : TExecutor, max_seconds : int, input_file_tuple : List[Optional[OcrdFileType]]) -> Tuple[TFuture, str, List[Optional[OcrdFileType]]]: + """ + Ensure all input files for a single page are + downloaded to the workspace, then schedule + :py:meth:`.process_process_file` to be run on + them via `executor` (enforcing a per-page time + limit of `max_seconds`). + + Delegates to :py:meth:`.process_page_file` + (wrapped in :py:func:`_page_worker` to share + the processor instance across forked processes). + + \b + Returns a tuple of: + - the scheduled future object, + - the corresponding pageId, + - the corresponding input files. + """ + input_files : List[Optional[OcrdFileType]] = [None] * len(input_file_tuple) + page_id = next(input_file.pageId + for input_file in input_file_tuple + if input_file) + self._base_logger.info(f"preparing page {page_id}") + for i, input_file in enumerate(input_file_tuple): + if input_file is None: + # file/page not found in this file grp + continue + input_files[i] = input_file + if not self.download: + continue + try: + input_files[i] = self.workspace.download_file(input_file) + except (ValueError, FileNotFoundError, HTTPError) as e: + self._base_logger.error(repr(e)) + self._base_logger.warning(f"failed downloading file {input_file} for page {page_id}") + # process page + #executor.submit(self.process_page_file, *input_files) + return executor.submit(_page_worker, max_seconds, *input_files), page_id, input_files + + def process_workspace_handle_tasks(self, tasks : Dict[TFuture, Tuple[str, List[Optional[OcrdFileType]]]]) -> Tuple[int, int, Dict[str, int], int]: + """ + Look up scheduled per-page futures one by one, + handle errors (exceptions) and gather results. + + \b + Enforces policies configured by the following + environment variables: + - `OCRD_EXISTING_OUTPUT` (abort/skip/overwrite) + - `OCRD_MISSING_OUTPUT` (abort/skip/fallback-copy) + - `OCRD_MAX_MISSING_OUTPUTS` (abort after all). + + \b + Returns a tuple of: + - the number of successfully processed pages + - the number of failed (i.e. skipped or copied) pages + - a dict of the type and corresponding number of exceptions seen + - the number of total requested pages (i.e. success+fail+existing). + + Delegates to :py:meth:`.process_workspace_handle_page_task` + for each page. + """ + # aggregate info for logging: + nr_succeeded = 0 + nr_failed = 0 + nr_errors = defaultdict(int) # count causes + if config.OCRD_MISSING_OUTPUT == 'SKIP': + reason = "skipped" + elif config.OCRD_MISSING_OUTPUT == 'COPY': + reason = "fallback-copied" + for task in tasks: + # wait for results, handle errors + page_id, input_files = tasks[task] + result = self.process_workspace_handle_page_task(page_id, input_files, task) + if isinstance(result, Exception): + nr_errors[result.__class__.__name__] += 1 + nr_failed += 1 + # FIXME: this is just prospective, because len(tasks)==nr_failed+nr_succeeded is not guaranteed + if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / len(tasks) > config.OCRD_MAX_MISSING_OUTPUTS: + # already irredeemably many failures, stop short + nr_errors = dict(nr_errors) + raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_failed+nr_succeeded}, {str(nr_errors)})") + elif result: + nr_succeeded += 1 + # else skipped - already exists + nr_errors = dict(nr_errors) + if nr_failed > 0: + nr_all = nr_succeeded + nr_failed + if config.OCRD_MAX_MISSING_OUTPUTS > 0 and nr_failed / nr_all > config.OCRD_MAX_MISSING_OUTPUTS: + raise Exception(f"too many failures with {reason} output ({nr_failed} of {nr_all}, {str(nr_errors)})") + self._base_logger.warning("%s %d of %d pages due to %s", reason, nr_failed, nr_all, str(nr_errors)) + return nr_succeeded, nr_failed, nr_errors, len(tasks) + + def process_workspace_handle_page_task(self, page_id : str, input_files : List[Optional[OcrdFileType]], task : TFuture) -> Union[bool, Exception]: + """ + \b + Await a single page result and handle errors (exceptions), + enforcing policies configured by the following + environment variables: + - `OCRD_EXISTING_OUTPUT` (abort/skip/overwrite) + - `OCRD_MISSING_OUTPUT` (abort/skip/fallback-copy) + - `OCRD_MAX_MISSING_OUTPUTS` (abort after all). + + \b + Returns + - true in case of success + - false in case the output already exists + - the exception in case of failure + """ + # FIXME: differentiate error cases in various ways: + # - ResourceNotFoundError → use ResourceManager to download (once), then retry + # - transient (I/O or OOM) error → maybe sleep, retry + # - persistent (data) error → skip / dummy / raise + try: + self._base_logger.debug("waiting for output of task %s (page %s)", task, page_id) + # timeout kwarg on future is useless: it only raises TimeoutError here, + # but does not stop the running process/thread, and executor itself + # offers nothing to that effect: + # task.result(timeout=max_seconds or None) + # so we instead applied the timeout within the worker function + task.result() + return True + except NotImplementedError: + # exclude NotImplementedError, so we can try process() below + raise + # handle input failures separately + except FileExistsError as err: + if config.OCRD_EXISTING_OUTPUT == 'ABORT': + raise err + if config.OCRD_EXISTING_OUTPUT == 'SKIP': + return False + if config.OCRD_EXISTING_OUTPUT == 'OVERWRITE': + # too late here, must not happen + raise Exception(f"got {err} despite OCRD_EXISTING_OUTPUT==OVERWRITE") + except KeyboardInterrupt: + raise + # broad coverage of output failures (including TimeoutError) + except Exception as err: + # FIXME: add re-usable/actionable logging + if config.OCRD_MISSING_OUTPUT == 'ABORT': + self._base_logger.error(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}") + raise err + self._base_logger.exception(f"Failure on page {page_id}: {str(err) or err.__class__.__name__}") + if config.OCRD_MISSING_OUTPUT == 'SKIP': + pass + elif config.OCRD_MISSING_OUTPUT == 'COPY': + self._copy_page_file(input_files[0]) + else: + desc = config.describe('OCRD_MISSING_OUTPUT', wrap_text=False, indent_text=False) + raise ValueError(f"unknown configuration value {config.OCRD_MISSING_OUTPUT} - {desc}") + return err + def _copy_page_file(self, input_file : OcrdFileType) -> None: """ Copy the given ``input_file`` of the :py:data:`workspace`, @@ -618,6 +789,12 @@ def process_page_file(self, *input_files : Optional[OcrdFileType]) -> None: # not PAGE and not an image to generate PAGE for self._base_logger.error(f"non-PAGE input for page {page_id}: {err}") output_file_id = make_file_id(input_files[0], self.output_file_grp) + output_file = next(self.workspace.mets.find_files(ID=output_file_id), None) + if output_file and config.OCRD_EXISTING_OUTPUT != 'OVERWRITE': + # short-cut avoiding useless computation: + raise FileExistsError( + f"A file with ID=={output_file_id} already exists {output_file} and neither force nor ignore are set" + ) result = self.process_page_pcgts(*input_pcgts, page_id=page_id) for image_result in result.images: image_file_id = f'{output_file_id}_{image_result.file_id_suffix}' @@ -934,6 +1111,47 @@ def zip_input_files(self, require_first=True, mimetype=None, on_error='skip'): ifts.append(tuple(ifiles)) return ifts +_page_worker_processor = None +""" +This global binding for the processor is required to avoid +squeezing the processor through a mp.Queue (which is impossible +due to unpicklable attributes like .workspace.mets._tree anyway) +when calling Processor.process_page_file as page worker processes +in Processor.process_workspace. Forking allows inheriting global +objects, and with the METS Server we do not mutate the local +processor instance anyway. +""" +def _page_worker_set_ctxt(processor, log_queue): + """ + Overwrites `ocrd.processor.base._page_worker_processor` instance + for sharing with subprocesses in ProcessPoolExecutor initializer. + """ + global _page_worker_processor + _page_worker_processor = processor + if log_queue: + # replace all log handlers with just one queue handler + logging.root.handlers = [logging.handlers.QueueHandler(log_queue)] + +def _page_worker(timeout, *input_files): + """ + Wraps a `Processor.process_page_file` call as payload (call target) + of the ProcessPoolExecutor workers, but also enforces the given timeout. + """ + page_id = next((file.pageId for file in input_files + if hasattr(file, 'pageId')), "") + if timeout > 0: + timer = Timer(timeout, interrupt_main) + timer.start() + try: + _page_worker_processor.process_page_file(*input_files) + _page_worker_processor.logger.debug("page worker completed for page %s", page_id) + except KeyboardInterrupt: + _page_worker_processor.logger.debug("page worker timed out for page %s", page_id) + raise TimeoutError() + finally: + if timeout > 0: + timer.cancel() + def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None): """Generate a string describing the full CLI of this processor including params. diff --git a/src/ocrd/processor/builtin/dummy/ocrd-tool.json b/src/ocrd/processor/builtin/dummy/ocrd-tool.json index ef4a4810fe..c79afcacbd 100644 --- a/src/ocrd/processor/builtin/dummy/ocrd-tool.json +++ b/src/ocrd/processor/builtin/dummy/ocrd-tool.json @@ -16,6 +16,26 @@ "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)" } } + }, + "ocrd-filter": { + "executable": "ocrd-filter", + "description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries", + "steps": ["recognition/post-correction"], + "categories": ["Quality assurance"], + "input_file_grp_cardinality": 1, + "output_file_grp_cardinality": 1, + "parameters": { + "select": { + "type": "string", + "default": "//*[ends-with(local-name(),'Region')]", + "description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'." + }, + "plot": { + "type": "boolean", + "default": false, + "description": "Whether to extract an image for each filtered segment and write to the output fileGrp." + } + } } } } diff --git a/src/ocrd/processor/builtin/dummy_processor.py b/src/ocrd/processor/builtin/dummy_processor.py index 72a260968f..bf7e2940b8 100644 --- a/src/ocrd/processor/builtin/dummy_processor.py +++ b/src/ocrd/processor/builtin/dummy_processor.py @@ -13,9 +13,6 @@ make_file_id, MIME_TO_EXT, MIMETYPE_PAGE, - parse_json_string_with_comments, - resource_string, - config ) from ocrd_modelfactory import page_from_file diff --git a/src/ocrd/processor/builtin/filter_processor.py b/src/ocrd/processor/builtin/filter_processor.py new file mode 100644 index 0000000000..c81517b0e5 --- /dev/null +++ b/src/ocrd/processor/builtin/filter_processor.py @@ -0,0 +1,108 @@ +# pylint: disable=missing-module-docstring,invalid-name +from typing import Optional + +from lxml import etree +import click + +from ocrd import Processor, OcrdPageResult, OcrdPageResultImage +from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor +from ocrd_models import OcrdPage + +_SEGTYPES = [ + "NoiseRegion", + "LineDrawingRegion", + "AdvertRegion", + "ImageRegion", + "ChartRegion", + "MusicRegion", + "GraphicRegion", + "UnknownRegion", + "CustomRegion", + "SeparatorRegion", + "MathsRegion", + "TextRegion", + "MapRegion", + "ChemRegion", + "TableRegion", + "TextLine", + "Word", + "Glyph" +] + +class FilterProcessor(Processor): + def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult: + """ + Remove PAGE segment hierarchy elements based on flexible selection criteria. + + Open and deserialise PAGE input file, then iterate over the segment hierarchy + down to the level required for ``select`` (which could be multiple levels at once). + + Remove any segments matching XPath query ``select`` from that hierarchy (and from + the `ReadingOrder` if it is a region type). + + \b + Besides full XPath 2.0 syntax, this supports extra predicates: + - `pc:pixelarea()` for the number of pixels of the bounding box (or sum area on node sets), + - `pc:textequiv()` for the first TextEquiv unicode string (or concatenated string on node sets). + + If ``plot`` is `true`, then extract and write an image file for all removed segments + to the output fileGrp (without reference to the PAGE). + + Produce a new PAGE output file by serialising the resulting hierarchy. + """ + pcgts = input_pcgts[0] + result = OcrdPageResult(pcgts) + nodes = pcgts.xpath(self.parameter['select']) + # get PAGE objects from matching etree nodes + # but allow only hierarchy segments + segments = [segment for segment in map(pcgts.revmap.get, nodes) + if segment.__class__.__name__.replace('Type', '') in _SEGTYPES] + if not(len(segments)): + self.logger.info("no matches") + return result + rodict = pcgts.get_Page().get_ReadingOrderGroups() + if self.parameter['plot']: + page_image, page_coords, _ = self.workspace.image_from_page(pcgts.get_Page(), page_id) + for segment in segments: + segtype = segment.original_tagname_ + self.logger.info("matched %s segment %s", segtype, segment.id) + parent = segment.parent_object_ + partype = parent.__class__.__name__.replace('Type', '') + if partype == 'Page': + getattr(parent, 'get_' + segtype)().remove(segment) + elif partype.endswith('Region'): + if segtype.endswith('Region'): + getattr(parent, 'get_' + segtype)().remove(segment) + else: + parent.TextLine.remove(segment) + elif partype == 'TextLine': + parent.Word.remove(segment) + elif partype == 'Word': + parent.Glyph.remove(segment) + else: + raise Exception(f"unexpected type ({partype}) of parent for matched segment ({segtype})") + segment.parent_object_ = None + if segtype.endswith('Region') and segment.id in rodict: + # remove from ReadingOrder as well + roelem = rodict[segment.id] + rorefs = getattr(roelem.parent_object_, roelem.__class__.__name__.replace('Type', '')) + rorefs.remove(roelem) + roelem.parent_object_ = None + del rodict[segment.id] + if self.parameter['plot']: + segment_image, _ = self.workspace.image_from_segment(segment, page_image, page_coords) + result.images.append(OcrdPageResultImage(segment_image, segment.id + '.IMG', None)) + return result + + @property + def metadata_filename(self): + return 'processor/builtin/dummy/ocrd-tool.json' + + @property + def executable(self): + return 'ocrd-filter' + +@click.command() +@ocrd_cli_options +def cli(*args, **kwargs): + return ocrd_cli_wrap_processor(FilterProcessor, *args, **kwargs) diff --git a/src/ocrd/processor/helpers.py b/src/ocrd/processor/helpers.py index 2cbbbd97e1..757f7ac045 100644 --- a/src/ocrd/processor/helpers.py +++ b/src/ocrd/processor/helpers.py @@ -2,6 +2,7 @@ Helper methods for running and documenting processors """ from time import perf_counter, process_time +from os import times from functools import lru_cache import json import inspect @@ -94,6 +95,7 @@ def run_processor( log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole) t0_wall = perf_counter() t0_cpu = process_time() + t0_os = times() if any(x in config.OCRD_PROFILE for x in ['RSS', 'PSS']): backend = 'psutil_pss' if 'PSS' in config.OCRD_PROFILE else 'psutil' from memory_profiler import memory_usage # pylint: disable=import-outside-toplevel @@ -123,7 +125,13 @@ def run_processor( t1_wall = perf_counter() - t0_wall t1_cpu = process_time() - t0_cpu - logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % ( + t1_os = times() + # add CPU time from child processes (page worker etc) + t1_cpu += t1_os.children_user - t0_os.children_user + t1_cpu += t1_os.children_system - t0_os.children_system + logProfile.info( + "Executing processor '%s' took %fs (wall) %fs (CPU)( " + "[--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']", ocrd_tool['executable'], t1_wall, t1_cpu, @@ -131,7 +139,7 @@ def run_processor( processor.output_file_grp or '', json.dumps(processor.parameter) or '', processor.page_id or '' - )) + ) workspace.mets.add_agent( name=name, _type='OTHER', diff --git a/src/ocrd/resource_manager.py b/src/ocrd/resource_manager.py index 3c4c603060..7f014cbf0e 100644 --- a/src/ocrd/resource_manager.py +++ b/src/ocrd/resource_manager.py @@ -23,6 +23,10 @@ # pylint: enable=wrong-import-position +# pylint: enable=wrong-import-position + +# pylint: enable=wrong-import-position + from ocrd_validators import OcrdResourceListValidator from ocrd_utils import getLogger, directory_size, get_moduledir, guess_media_type, config from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json diff --git a/src/ocrd_modelfactory/__init__.py b/src/ocrd_modelfactory/__init__.py index 828949fe96..3f7d675f86 100644 --- a/src/ocrd_modelfactory/__init__.py +++ b/src/ocrd_modelfactory/__init__.py @@ -101,5 +101,11 @@ def page_from_file(input_file, **kwargs) -> OcrdPage: if input_file.mimetype.startswith('image'): return page_from_image(input_file) if input_file.mimetype == MIMETYPE_PAGE: - return OcrdPage(*parseEtree(input_file.local_filename, silence=True)) + revmap = {} + # the old/default gds.reverse_node_mapping is useless + # since 2.39.4, we can actually get the exact reverse mapping for perfect round-trip + # but awkwardly, we have to pass the dict in for that + page = OcrdPage(*parseEtree(input_file.local_filename, reverse_mapping=revmap, silence=True)) + page.revmap = revmap + return page raise ValueError("Unsupported mimetype '%s'" % input_file.mimetype) diff --git a/src/ocrd_models/ocrd_exif.py b/src/ocrd_models/ocrd_exif.py index ab050bae59..937416f5ef 100644 --- a/src/ocrd_models/ocrd_exif.py +++ b/src/ocrd_models/ocrd_exif.py @@ -49,11 +49,11 @@ def run_identify(self, img): for prop in ['compression', 'photometric_interpretation']: setattr(self, prop, img.info[prop] if prop in img.info else None) if img.filename: - ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', img.filename], check=False, stderr=PIPE, stdout=PIPE) + ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U ', img.filename], check=False, stderr=PIPE, stdout=PIPE) else: with BytesIO() as bio: img.save(bio, format=img.format) - ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', '/dev/stdin'], check=False, stderr=PIPE, stdout=PIPE, input=bio.getvalue()) + ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U ', '/dev/stdin'], check=False, stderr=PIPE, stdout=PIPE, input=bio.getvalue()) if ret.returncode: stderr = ret.stderr.decode('utf-8') if 'no decode delegate for this image format' in stderr: diff --git a/src/ocrd_models/ocrd_mets.py b/src/ocrd_models/ocrd_mets.py index c3fb11f600..de068567e2 100644 --- a/src/ocrd_models/ocrd_mets.py +++ b/src/ocrd_models/ocrd_mets.py @@ -599,7 +599,16 @@ def get_physical_pages(self, for_fileIds : Optional[List[str]] = None, for_pageI If return_divs is set, returns div memory objects instead of strings of ids """ if for_fileIds is None and for_pageIds is None: + if return_divs: + if self._cache_flag: + return list(self._page_cache[METS_PAGE_DIV_ATTRIBUTE.ID].values()) + + return [x for x in self._tree.getroot().xpath( + 'mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div[@TYPE="page"]', + namespaces=NS)] + return self.physical_pages + # log = getLogger('ocrd.models.ocrd_mets.get_physical_pages') if for_pageIds is not None: ret = [] diff --git a/src/ocrd_models/ocrd_page.py b/src/ocrd_models/ocrd_page.py index 3f0cc690fa..046606100f 100644 --- a/src/ocrd_models/ocrd_page.py +++ b/src/ocrd_models/ocrd_page.py @@ -2,8 +2,9 @@ API to PAGE-XML, generated with generateDS from XML schema. """ from io import StringIO -from typing import Dict, Union +from typing import Dict, Union, Any from lxml import etree as ET +from elementpath import XPath2Parser, XPathContext __all__ = [ 'parse', @@ -132,6 +133,7 @@ ) from .constants import NAMESPACES +from .xpath_functions import pc_functions # add docstrings parse.__doc__ = ( @@ -189,12 +191,25 @@ def __init__( pcgts : PcGtsType, etree : ET._Element, mapping : Dict[str, ET._Element], - revmap : Dict[ET._Element, str], + revmap : Dict[ET._Element, Any], ): self._pcgts = pcgts self.etree = etree self.mapping = mapping self.revmap = revmap + self.xpath_parser = XPath2Parser(namespaces={ + 'page': NAMESPACES['page'], + 'pc': NAMESPACES['page']}) + for func in pc_functions: + name = func.__name__.replace('_', '-') + if name.startswith('pc-'): + name = name[3:] + elif name.startswith('pc'): + name = name[2:] + # register + self.xpath_parser.external_function(func, name=name, prefix='pc') + self.xpath_context = XPathContext(self.etree) + self.xpath = lambda expression: self.xpath_parser.parse(expression).get_results(self.xpath_context) def __getattr__(self, name): return getattr(self._pcgts, name) @@ -208,11 +223,15 @@ def to_xml(el, skip_declaration=False) -> str: # XXX remove potential empty ReadingOrder if hasattr(el, 'prune_ReadingOrder'): el.prune_ReadingOrder() + if hasattr(el, 'original_tagname_'): + name = el.original_tagname_ or 'PcGts' + else: + name = 'PcGts' sio = StringIO() el.export( outfile=sio, level=0, - name_='PcGts', + name_=name, namespaceprefix_='pc:', namespacedef_='xmlns:pc="%s" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="%s %s/pagecontent.xsd"' % ( NAMESPACES['page'], diff --git a/src/ocrd_models/ocrd_page_generateds.py b/src/ocrd_models/ocrd_page_generateds.py index f2b7c0551e..97d5a800b6 100644 --- a/src/ocrd_models/ocrd_page_generateds.py +++ b/src/ocrd_models/ocrd_page_generateds.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # -# Generated Sat Sep 7 14:17:39 2024 by generateDS.py version 2.35.20. +# Generated Sun Sep 15 21:49:27 2024 by generateDS.py version 2.44.1. # Python 3.8.17+ (heads/3.8-dirty:1663f8ba84, Aug 15 2023, 18:13:01) [GCC 8.3.0] # # Command line options: @@ -24,21 +24,23 @@ # core # +import sys +try: + ModulenotfoundExp_ = ModuleNotFoundError +except NameError: + ModulenotfoundExp_ = ImportError from itertools import zip_longest import os -import sys import re as re_ import base64 import datetime as datetime_ import decimal as decimal_ -try: - from lxml import etree as etree_ -except ImportError: - from xml.etree import ElementTree as etree_ +from lxml import etree as etree_ Validate_simpletypes_ = True SaveElementTreeNode = True +TagNamePrefix = "" if sys.version_info.major == 2: BaseStrType_ = basestring else: @@ -97,7 +99,7 @@ def parsexmlstring_(instring, parser=None, **kwargs): # Additionally, the generatedsnamespaces module can contain a python # dictionary named GenerateDSNamespaceTypePrefixes that associates element # types with the namespace prefixes that are to be added to the -# "xsi:type" attribute value. See the exportAttributes method of +# "xsi:type" attribute value. See the _exportAttributes method of # any generated element type and the generation of "xsi:type" for an # example of the use of this table. # An example table: @@ -112,11 +114,11 @@ def parsexmlstring_(instring, parser=None, **kwargs): try: from generatedsnamespaces import GenerateDSNamespaceDefs as GenerateDSNamespaceDefs_ -except ImportError: +except ModulenotfoundExp_ : GenerateDSNamespaceDefs_ = {} try: from generatedsnamespaces import GenerateDSNamespaceTypePrefixes as GenerateDSNamespaceTypePrefixes_ -except ImportError: +except ModulenotfoundExp_ : GenerateDSNamespaceTypePrefixes_ = {} # @@ -127,7 +129,7 @@ def parsexmlstring_(instring, parser=None, **kwargs): # try: from generatedscollector import GdsCollector as GdsCollector_ -except ImportError: +except ModulenotfoundExp_ : class GdsCollector_(object): @@ -161,7 +163,7 @@ def write_messages(self, outstream): try: from enum import Enum -except ImportError: +except ModulenotfoundExp_ : Enum = object # @@ -174,7 +176,7 @@ def write_messages(self, outstream): class GeneratedsSuper(object): __hash__ = object.__hash__ - tzoff_pattern = re_.compile(r'(\+|-)((0\d|1[0-3]):[0-5]\d|14:00)$') + tzoff_pattern = re_.compile('(\\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00)$') class _FixedOffsetTZ(datetime_.tzinfo): def __init__(self, offset, name): self.__offset = datetime_.timedelta(minutes=offset) @@ -185,6 +187,33 @@ def tzname(self, dt): return self.__name def dst(self, dt): return None + def __str__(self): + settings = { + 'str_pretty_print': True, + 'str_indent_level': 0, + 'str_namespaceprefix': '', + 'str_name': self.__class__.__name__, + 'str_namespacedefs': '', + } + for n in settings: + if hasattr(self, n): + settings[n] = getattr(self, n) + if sys.version_info.major == 2: + from StringIO import StringIO + else: + from io import StringIO + output = StringIO() + self.export( + output, + settings['str_indent_level'], + pretty_print=settings['str_pretty_print'], + namespaceprefix_=settings['str_namespaceprefix'], + name_=settings['str_name'], + namespacedef_=settings['str_namespacedefs'] + ) + strval = output.getvalue() + output.close() + return strval def gds_format_string(self, input_data, input_name=''): return input_data def gds_parse_string(self, input_data, node=None, input_name=''): @@ -195,11 +224,11 @@ def gds_validate_string(self, input_data, node=None, input_name=''): else: return input_data def gds_format_base64(self, input_data, input_name=''): - return base64.b64encode(input_data) + return base64.b64encode(input_data).decode('ascii') def gds_validate_base64(self, input_data, node=None, input_name=''): return input_data def gds_format_integer(self, input_data, input_name=''): - return '%d' % input_data + return '%d' % int(input_data) def gds_parse_integer(self, input_data, node=None, input_name=''): try: ival = int(input_data) @@ -213,6 +242,8 @@ def gds_validate_integer(self, input_data, node=None, input_name=''): raise_parse_error(node, 'Requires integer value') return value def gds_format_integer_list(self, input_data, input_name=''): + if len(input_data) > 0 and not isinstance(input_data[0], BaseStrType_): + input_data = [str(s) for s in input_data] return '%s' % ' '.join(input_data) def gds_validate_integer_list( self, input_data, node=None, input_name=''): @@ -221,10 +252,14 @@ def gds_validate_integer_list( try: int(value) except (TypeError, ValueError): - raise_parse_error(node, 'Requires sequence of integer valuess') + raise_parse_error(node, 'Requires sequence of integer values') return values def gds_format_float(self, input_data, input_name=''): - return ('%.15f' % input_data).rstrip('0') + value = ('%.15f' % float(input_data)).rstrip('0') + if value.endswith('.'): + value += '0' + return value + def gds_parse_float(self, input_data, node=None, input_name=''): try: fval_ = float(input_data) @@ -238,6 +273,8 @@ def gds_validate_float(self, input_data, node=None, input_name=''): raise_parse_error(node, 'Requires float value') return value def gds_format_float_list(self, input_data, input_name=''): + if len(input_data) > 0 and not isinstance(input_data[0], BaseStrType_): + input_data = [str(s) for s in input_data] return '%s' % ' '.join(input_data) def gds_validate_float_list( self, input_data, node=None, input_name=''): @@ -249,7 +286,12 @@ def gds_validate_float_list( raise_parse_error(node, 'Requires sequence of float values') return values def gds_format_decimal(self, input_data, input_name=''): - return ('%s' % input_data).rstrip('0') + return_value = '%s' % input_data + if '.' in return_value: + return_value = return_value.rstrip('0') + if return_value.endswith('.'): + return_value = return_value.rstrip('.') + return return_value def gds_parse_decimal(self, input_data, node=None, input_name=''): try: decimal_value = decimal_.Decimal(input_data) @@ -263,7 +305,9 @@ def gds_validate_decimal(self, input_data, node=None, input_name=''): raise_parse_error(node, 'Requires decimal value') return value def gds_format_decimal_list(self, input_data, input_name=''): - return '%s' % ' '.join(input_data) + if len(input_data) > 0 and not isinstance(input_data[0], BaseStrType_): + input_data = [str(s) for s in input_data] + return ' '.join([self.gds_format_decimal(item) for item in input_data]) def gds_validate_decimal_list( self, input_data, node=None, input_name=''): values = input_data.split() @@ -274,7 +318,7 @@ def gds_validate_decimal_list( raise_parse_error(node, 'Requires sequence of decimal values') return values def gds_format_double(self, input_data, input_name=''): - return '%e' % input_data + return '%s' % input_data def gds_parse_double(self, input_data, node=None, input_name=''): try: fval_ = float(input_data) @@ -288,6 +332,8 @@ def gds_validate_double(self, input_data, node=None, input_name=''): raise_parse_error(node, 'Requires double or float value') return value def gds_format_double_list(self, input_data, input_name=''): + if len(input_data) > 0 and not isinstance(input_data[0], BaseStrType_): + input_data = [str(s) for s in input_data] return '%s' % ' '.join(input_data) def gds_validate_double_list( self, input_data, node=None, input_name=''): @@ -302,6 +348,7 @@ def gds_validate_double_list( def gds_format_boolean(self, input_data, input_name=''): return ('%s' % input_data).lower() def gds_parse_boolean(self, input_data, node=None, input_name=''): + input_data = input_data.strip() if input_data in ('true', '1'): bval = True elif input_data in ('false', '0'): @@ -317,11 +364,14 @@ def gds_validate_boolean(self, input_data, node=None, input_name=''): '(one of True, 1, False, 0)') return input_data def gds_format_boolean_list(self, input_data, input_name=''): + if len(input_data) > 0 and not isinstance(input_data[0], BaseStrType_): + input_data = [str(s) for s in input_data] return '%s' % ' '.join(input_data) def gds_validate_boolean_list( self, input_data, node=None, input_name=''): values = input_data.split() for value in values: + value = self.gds_parse_boolean(value, node, input_name) if value not in (True, 1, False, 0, ): raise_parse_error( node, @@ -478,6 +528,7 @@ def gds_validate_simple_patterns(self, patterns, target): # The target value must match at least one of the patterns # in order for the test to succeed. found1 = True + target = str(target) for patterns1 in patterns: found2 = False for patterns2 in patterns1: @@ -563,7 +614,7 @@ def get_path_(self, node): path_list.reverse() path = '/'.join(path_list) return path - Tag_strip_pattern_ = re_.compile(r'\{.*\}') + Tag_strip_pattern_ = re_.compile(r'{.*}') def get_path_list_(self, node, path_list): if node is None: return @@ -723,6 +774,7 @@ def quote_attrib(inStr): s1 = s1.replace('&', '&') s1 = s1.replace('<', '<') s1 = s1.replace('>', '>') + s1 = s1.replace('\n', ' ') if '"' in s1: if "'" in s1: s1 = '"%s"' % s1.replace('"', """) @@ -768,7 +820,10 @@ def find_attr_value_(attr_name, node): value = attrs.get(attr_name) elif len(attr_parts) == 2: prefix, name = attr_parts - namespace = node.nsmap.get(prefix) + if prefix == 'xml': + namespace = 'http://www.w3.org/XML/1998/namespace' + else: + namespace = node.nsmap.get(prefix) if namespace is not None: value = attrs.get('{%s}%s' % (namespace, name, )) return value @@ -849,7 +904,7 @@ def exportSimple(self, outfile, level, name): self.name, base64.b64encode(self.value), self.name)) - def to_etree(self, element, mapping_=None, nsmap_=None): + def to_etree(self, element, mapping_=None, reverse_mapping_=None, nsmap_=None): if self.category == MixedContainer.CategoryText: # Prevent exporting empty content as empty lines. if self.value.strip(): @@ -869,7 +924,7 @@ def to_etree(self, element, mapping_=None, nsmap_=None): subelement.text = self.to_etree_simple() else: # category == MixedContainer.CategoryComplex self.value.to_etree(element) - def to_etree_simple(self, mapping_=None, nsmap_=None): + def to_etree_simple(self, mapping_=None, reverse_mapping_=None, nsmap_=None): if self.content_type == MixedContainer.TypeString: text = self.value elif (self.content_type == MixedContainer.TypeInteger or @@ -942,11 +997,10 @@ def _cast(typ, value): return value return typ(value) + # -# Data representation classes. +# Start enum classes # - - class AlignSimpleType(str, Enum): LEFT='left' CENTRE='centre' @@ -1013,6 +1067,200 @@ class GroupTypeSimpleType(str, Enum): OTHER='other' +class LanguageSimpleType(str, Enum): + """LanguageSimpleType -- ISO 639.x 2016-07-14 + + """ + ABKHAZ='Abkhaz' + AFAR='Afar' + AFRIKAANS='Afrikaans' + AKAN='Akan' + ALBANIAN='Albanian' + AMHARIC='Amharic' + ARABIC='Arabic' + ARAGONESE='Aragonese' + ARMENIAN='Armenian' + ASSAMESE='Assamese' + AVARIC='Avaric' + AVESTAN='Avestan' + AYMARA='Aymara' + AZERBAIJANI='Azerbaijani' + BAMBARA='Bambara' + BASHKIR='Bashkir' + BASQUE='Basque' + BELARUSIAN='Belarusian' + BENGALI='Bengali' + BIHARI='Bihari' + BISLAMA='Bislama' + BOSNIAN='Bosnian' + BRETON='Breton' + BULGARIAN='Bulgarian' + BURMESE='Burmese' + CAMBODIAN='Cambodian' + CANTONESE='Cantonese' + CATALAN='Catalan' + CHAMORRO='Chamorro' + CHECHEN='Chechen' + CHICHEWA='Chichewa' + CHINESE='Chinese' + CHUVASH='Chuvash' + CORNISH='Cornish' + CORSICAN='Corsican' + CREE='Cree' + CROATIAN='Croatian' + CZECH='Czech' + DANISH='Danish' + DIVEHI='Divehi' + DUTCH='Dutch' + DZONGKHA='Dzongkha' + ENGLISH='English' + ESPERANTO='Esperanto' + ESTONIAN='Estonian' + EWE='Ewe' + FAROESE='Faroese' + FIJIAN='Fijian' + FINNISH='Finnish' + FRENCH='French' + FULA='Fula' + GAELIC='Gaelic' + GALICIAN='Galician' + GANDA='Ganda' + GEORGIAN='Georgian' + GERMAN='German' + GREEK='Greek' + GUARANÍ='Guaraní' + GUJARATI='Gujarati' + HAITIAN='Haitian' + HAUSA='Hausa' + HEBREW='Hebrew' + HERERO='Herero' + HINDI='Hindi' + HIRI_MOTU='Hiri Motu' + HUNGARIAN='Hungarian' + ICELANDIC='Icelandic' + IDO='Ido' + IGBO='Igbo' + INDONESIAN='Indonesian' + INTERLINGUA='Interlingua' + INTERLINGUE='Interlingue' + INUKTITUT='Inuktitut' + INUPIAQ='Inupiaq' + IRISH='Irish' + ITALIAN='Italian' + JAPANESE='Japanese' + JAVANESE='Javanese' + KALAALLISUT='Kalaallisut' + KANNADA='Kannada' + KANURI='Kanuri' + KASHMIRI='Kashmiri' + KAZAKH='Kazakh' + KHMER='Khmer' + KIKUYU='Kikuyu' + KINYARWANDA='Kinyarwanda' + KIRUNDI='Kirundi' + KOMI='Komi' + KONGO='Kongo' + KOREAN='Korean' + KURDISH='Kurdish' + KWANYAMA='Kwanyama' + KYRGYZ='Kyrgyz' + LAO='Lao' + LATIN='Latin' + LATVIAN='Latvian' + LIMBURGISH='Limburgish' + LINGALA='Lingala' + LITHUANIAN='Lithuanian' + LUBA_KATANGA='Luba-Katanga' + LUXEMBOURGISH='Luxembourgish' + MACEDONIAN='Macedonian' + MALAGASY='Malagasy' + MALAY='Malay' + MALAYALAM='Malayalam' + MALTESE='Maltese' + MANX='Manx' + MĀORI='Māori' + MARATHI='Marathi' + MARSHALLESE='Marshallese' + MONGOLIAN='Mongolian' + NAURU='Nauru' + NAVAJO='Navajo' + NDONGA='Ndonga' + NEPALI='Nepali' + NORTH_NDEBELE='North Ndebele' + NORTHERN_SAMI='Northern Sami' + NORWEGIAN='Norwegian' + NORWEGIAN_BOKMÅL='Norwegian Bokmål' + NORWEGIAN_NYNORSK='Norwegian Nynorsk' + NUOSU='Nuosu' + OCCITAN='Occitan' + OJIBWE='Ojibwe' + OLD_CHURCH_SLAVONIC='Old Church Slavonic' + ORIYA='Oriya' + OROMO='Oromo' + OSSETIAN='Ossetian' + PĀLI='Pāli' + PANJABI='Panjabi' + PASHTO='Pashto' + PERSIAN='Persian' + POLISH='Polish' + PORTUGUESE='Portuguese' + PUNJABI='Punjabi' + QUECHUA='Quechua' + ROMANIAN='Romanian' + ROMANSH='Romansh' + RUSSIAN='Russian' + SAMOAN='Samoan' + SANGO='Sango' + SANSKRIT='Sanskrit' + SARDINIAN='Sardinian' + SERBIAN='Serbian' + SHONA='Shona' + SINDHI='Sindhi' + SINHALA='Sinhala' + SLOVAK='Slovak' + SLOVENE='Slovene' + SOMALI='Somali' + SOUTH_NDEBELE='South Ndebele' + SOUTHERN_SOTHO='Southern Sotho' + SPANISH='Spanish' + SUNDANESE='Sundanese' + SWAHILI='Swahili' + SWATI='Swati' + SWEDISH='Swedish' + TAGALOG='Tagalog' + TAHITIAN='Tahitian' + TAJIK='Tajik' + TAMIL='Tamil' + TATAR='Tatar' + TELUGU='Telugu' + THAI='Thai' + TIBETAN='Tibetan' + TIGRINYA='Tigrinya' + TONGA='Tonga' + TSONGA='Tsonga' + TSWANA='Tswana' + TURKISH='Turkish' + TURKMEN='Turkmen' + TWI='Twi' + UIGHUR='Uighur' + UKRAINIAN='Ukrainian' + URDU='Urdu' + UZBEK='Uzbek' + VENDA='Venda' + VIETNAMESE='Vietnamese' + VOLAPÜK='Volapük' + WALLOON='Walloon' + WELSH='Welsh' + WESTERN_FRISIAN='Western Frisian' + WOLOF='Wolof' + XHOSA='Xhosa' + YIDDISH='Yiddish' + YORUBA='Yoruba' + ZHUANG='Zhuang' + ZULU='Zulu' + OTHER='other' + + class PageTypeSimpleType(str, Enum): FRONTCOVER='front-cover' BACKCOVER='back-cover' @@ -1025,7 +1273,9 @@ class PageTypeSimpleType(str, Enum): class ProductionSimpleType(str, Enum): - """Text production type""" + """ProductionSimpleType -- Text production type + + """ PRINTED='printed' TYPEWRITTEN='typewritten' HANDWRITTENCURSIVE='handwritten-cursive' @@ -1041,6 +1291,193 @@ class ReadingDirectionSimpleType(str, Enum): BOTTOMTOTOP='bottom-to-top' +class ScriptSimpleType(str, Enum): + """ScriptSimpleType -- iso15924 2016-07-14 + + """ + ADLM_ADLAM='Adlm - Adlam' + AFAK_AFAKA='Afak - Afaka' + AGHB_CAUCASIAN_ALBANIAN='Aghb - Caucasian Albanian' + AHOM_AHOM_TAI_AHOM='Ahom - Ahom, Tai Ahom' + ARAB_ARABIC='Arab - Arabic' + ARAN_ARABIC_NASTALIQVARIANT='Aran - Arabic (Nastaliq variant)' + ARMI_IMPERIAL_ARAMAIC='Armi - Imperial Aramaic' + ARMN_ARMENIAN='Armn - Armenian' + AVST_AVESTAN='Avst - Avestan' + BALI_BALINESE='Bali - Balinese' + BAMU_BAMUM='Bamu - Bamum' + BASS_BASSA_VAH='Bass - Bassa Vah' + BATK_BATAK='Batk - Batak' + BENG_BENGALI='Beng - Bengali' + BHKS_BHAIKSUKI='Bhks - Bhaiksuki' + BLIS_BLISSYMBOLS='Blis - Blissymbols' + BOPO_BOPOMOFO='Bopo - Bopomofo' + BRAH_BRAHMI='Brah - Brahmi' + BRAI_BRAILLE='Brai - Braille' + BUGI_BUGINESE='Bugi - Buginese' + BUHD_BUHID='Buhd - Buhid' + CAKM_CHAKMA='Cakm - Chakma' + CANS_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS='Cans - Unified Canadian Aboriginal Syllabics' + CARI_CARIAN='Cari - Carian' + CHAM_CHAM='Cham - Cham' + CHER_CHEROKEE='Cher - Cherokee' + CIRT_CIRTH='Cirt - Cirth' + COPT_COPTIC='Copt - Coptic' + CPRT_CYPRIOT='Cprt - Cypriot' + CYRL_CYRILLIC='Cyrl - Cyrillic' + CYRS_CYRILLIC_OLD_CHURCH_SLAVONICVARIANT='Cyrs - Cyrillic (Old Church Slavonic variant)' + DEVA_DEVANAGARI_NAGARI='Deva - Devanagari (Nagari)' + DSRT_DESERET_MORMON='Dsrt - Deseret (Mormon)' + DUPL_DUPLOYANSHORTHAND_DUPLOYANSTENOGRAPHY='Dupl - Duployan shorthand, Duployan stenography' + EGYD_EGYPTIANDEMOTIC='Egyd - Egyptian demotic' + EGYH_EGYPTIANHIERATIC='Egyh - Egyptian hieratic' + EGYP_EGYPTIANHIEROGLYPHS='Egyp - Egyptian hieroglyphs' + ELBA_ELBASAN='Elba - Elbasan' + ETHI_ETHIOPIC='Ethi - Ethiopic' + GEOK_KHUTSURI_ASOMTAVRULIAND_NUSKHURI='Geok - Khutsuri (Asomtavruli and Nuskhuri)' + GEOR_GEORGIAN_MKHEDRULI='Geor - Georgian (Mkhedruli)' + GLAG_GLAGOLITIC='Glag - Glagolitic' + GOTH_GOTHIC='Goth - Gothic' + GRAN_GRANTHA='Gran - Grantha' + GREK_GREEK='Grek - Greek' + GUJR_GUJARATI='Gujr - Gujarati' + GURU_GURMUKHI='Guru - Gurmukhi' + HANB_HANWITH_BOPOMOFO='Hanb - Han with Bopomofo' + HANG_HANGUL='Hang - Hangul' + HANI_HAN_HANZI_KANJI_HANJA='Hani - Han (Hanzi, Kanji, Hanja)' + HANO_HANUNOO_HANUNÓO='Hano - Hanunoo (Hanunóo)' + HANS_HAN_SIMPLIFIEDVARIANT='Hans - Han (Simplified variant)' + HANT_HAN_TRADITIONALVARIANT='Hant - Han (Traditional variant)' + HATR_HATRAN='Hatr - Hatran' + HEBR_HEBREW='Hebr - Hebrew' + HIRA_HIRAGANA='Hira - Hiragana' + HLUW_ANATOLIAN_HIEROGLYPHS='Hluw - Anatolian Hieroglyphs' + HMNG_PAHAWH_HMONG='Hmng - Pahawh Hmong' + HRKT_JAPANESESYLLABARIES='Hrkt - Japanese syllabaries' + HUNG_OLD_HUNGARIAN_HUNGARIAN_RUNIC='Hung - Old Hungarian (Hungarian Runic)' + INDS_INDUS_HARAPPAN='Inds - Indus (Harappan)' + ITAL_OLD_ITALIC_ETRUSCAN_OSCANETC='Ital - Old Italic (Etruscan, Oscan etc.)' + JAMO_JAMO='Jamo - Jamo' + JAVA_JAVANESE='Java - Javanese' + JPAN_JAPANESE='Jpan - Japanese' + JURC_JURCHEN='Jurc - Jurchen' + KALI_KAYAH_LI='Kali - Kayah Li' + KANA_KATAKANA='Kana - Katakana' + KHAR_KHAROSHTHI='Khar - Kharoshthi' + KHMR_KHMER='Khmr - Khmer' + KHOJ_KHOJKI='Khoj - Khojki' + KITL_KHITANLARGESCRIPT='Kitl - Khitan large script' + KITS_KHITANSMALLSCRIPT='Kits - Khitan small script' + KNDA_KANNADA='Knda - Kannada' + KORE_KOREANALIASFOR_HANGUL_HAN='Kore - Korean (alias for Hangul + Han)' + KPEL_KPELLE='Kpel - Kpelle' + KTHI_KAITHI='Kthi - Kaithi' + LANA_TAI_THAM_LANNA='Lana - Tai Tham (Lanna)' + LAOO_LAO='Laoo - Lao' + LATF_LATIN_FRAKTURVARIANT='Latf - Latin (Fraktur variant)' + LATG_LATIN_GAELICVARIANT='Latg - Latin (Gaelic variant)' + LATN_LATIN='Latn - Latin' + LEKE_LEKE='Leke - Leke' + LEPC_LEPCHARÓNG='Lepc - Lepcha (Róng)' + LIMB_LIMBU='Limb - Limbu' + LINA_LINEARA='Lina - Linear A' + LINB_LINEARB='Linb - Linear B' + LISU_LISU_FRASER='Lisu - Lisu (Fraser)' + LOMA_LOMA='Loma - Loma' + LYCI_LYCIAN='Lyci - Lycian' + LYDI_LYDIAN='Lydi - Lydian' + MAHJ_MAHAJANI='Mahj - Mahajani' + MAND_MANDAIC_MANDAEAN='Mand - Mandaic, Mandaean' + MANI_MANICHAEAN='Mani - Manichaean' + MARC_MARCHEN='Marc - Marchen' + MAYA_MAYANHIEROGLYPHS='Maya - Mayan hieroglyphs' + MEND_MENDE_KIKAKUI='Mend - Mende Kikakui' + MERC_MEROITIC_CURSIVE='Merc - Meroitic Cursive' + MERO_MEROITIC_HIEROGLYPHS='Mero - Meroitic Hieroglyphs' + MLYM_MALAYALAM='Mlym - Malayalam' + MODI_MODI_MOḌĪ='Modi - Modi, Moḍī' + MONG_MONGOLIAN='Mong - Mongolian' + MOON_MOON_MOONCODE_MOONSCRIPT_MOONTYPE='Moon - Moon (Moon code, Moon script, Moon type)' + MROO_MRO_MRU='Mroo - Mro, Mru' + MTEI_MEITEI_MAYEK_MEITHEI_MEETEI='Mtei - Meitei Mayek (Meithei, Meetei)' + MULT_MULTANI='Mult - Multani' + MYMR_MYANMAR_BURMESE='Mymr - Myanmar (Burmese)' + NARB_OLD_NORTH_ARABIAN_ANCIENT_NORTH_ARABIAN='Narb - Old North Arabian (Ancient North Arabian)' + NBAT_NABATAEAN='Nbat - Nabataean' + NEWA_NEWA_NEWAR_NEWARI='Newa - Newa, Newar, Newari' + NKGB_NAKHI_GEBA='Nkgb - Nakhi Geba' + NKOON_KO='Nkoo - N’Ko' + NSHUNÜSHU='Nshu - Nüshu' + OGAM_OGHAM='Ogam - Ogham' + OLCK_OL_CHIKI_OL_CEMET_OL_SANTALI='Olck - Ol Chiki (Ol Cemet’, Ol, Santali)' + ORKH_OLD_TURKIC_ORKHON_RUNIC='Orkh - Old Turkic, Orkhon Runic' + ORYA_ORIYA='Orya - Oriya' + OSGE_OSAGE='Osge - Osage' + OSMA_OSMANYA='Osma - Osmanya' + PALM_PALMYRENE='Palm - Palmyrene' + PAUC_PAU_CIN_HAU='Pauc - Pau Cin Hau' + PERM_OLD_PERMIC='Perm - Old Permic' + PHAG_PHAGSPA='Phag - Phags-pa' + PHLI_INSCRIPTIONAL_PAHLAVI='Phli - Inscriptional Pahlavi' + PHLP_PSALTER_PAHLAVI='Phlp - Psalter Pahlavi' + PHLV_BOOK_PAHLAVI='Phlv - Book Pahlavi' + PHNX_PHOENICIAN='Phnx - Phoenician' + PIQD_KLINGONKLIP_IQA_D='Piqd - Klingon (KLI pIqaD)' + PLRD_MIAO_POLLARD='Plrd - Miao (Pollard)' + PRTI_INSCRIPTIONAL_PARTHIAN='Prti - Inscriptional Parthian' + RJNG_REJANG_REDJANG_KAGANGA='Rjng - Rejang (Redjang, Kaganga)' + RORO_RONGORONGO='Roro - Rongorongo' + RUNR_RUNIC='Runr - Runic' + SAMR_SAMARITAN='Samr - Samaritan' + SARA_SARATI='Sara - Sarati' + SARB_OLD_SOUTH_ARABIAN='Sarb - Old South Arabian' + SAUR_SAURASHTRA='Saur - Saurashtra' + SGNW_SIGN_WRITING='Sgnw - SignWriting' + SHAW_SHAVIAN_SHAW='Shaw - Shavian (Shaw)' + SHRD_SHARADAŚĀRADĀ='Shrd - Sharada, Śāradā' + SIDD_SIDDHAM='Sidd - Siddham' + SIND_KHUDAWADI_SINDHI='Sind - Khudawadi, Sindhi' + SINH_SINHALA='Sinh - Sinhala' + SORA_SORA_SOMPENG='Sora - Sora Sompeng' + SUND_SUNDANESE='Sund - Sundanese' + SYLO_SYLOTI_NAGRI='Sylo - Syloti Nagri' + SYRC_SYRIAC='Syrc - Syriac' + SYRE_SYRIAC_ESTRANGELOVARIANT='Syre - Syriac (Estrangelo variant)' + SYRJ_SYRIAC_WESTERNVARIANT='Syrj - Syriac (Western variant)' + SYRN_SYRIAC_EASTERNVARIANT='Syrn - Syriac (Eastern variant)' + TAGB_TAGBANWA='Tagb - Tagbanwa' + TAKR_TAKRI='Takr - Takri' + TALE_TAI_LE='Tale - Tai Le' + TALU_NEW_TAI_LUE='Talu - New Tai Lue' + TAML_TAMIL='Taml - Tamil' + TANG_TANGUT='Tang - Tangut' + TAVT_TAI_VIET='Tavt - Tai Viet' + TELU_TELUGU='Telu - Telugu' + TENG_TENGWAR='Teng - Tengwar' + TFNG_TIFINAGH_BERBER='Tfng - Tifinagh (Berber)' + TGLG_TAGALOG_BAYBAYIN_ALIBATA='Tglg - Tagalog (Baybayin, Alibata)' + THAA_THAANA='Thaa - Thaana' + THAI_THAI='Thai - Thai' + TIBT_TIBETAN='Tibt - Tibetan' + TIRH_TIRHUTA='Tirh - Tirhuta' + UGAR_UGARITIC='Ugar - Ugaritic' + VAII_VAI='Vaii - Vai' + VISP_VISIBLE_SPEECH='Visp - Visible Speech' + WARA_WARANG_CITI_VARANG_KSHITI='Wara - Warang Citi (Varang Kshiti)' + WOLE_WOLEAI='Wole - Woleai' + XPEO_OLD_PERSIAN='Xpeo - Old Persian' + XSUX_CUNEIFORM_SUMERO_AKKADIAN='Xsux - Cuneiform, Sumero-Akkadian' + YIII_YI='Yiii - Yi' + ZINH_CODEFORINHERITEDSCRIPT='Zinh - Code for inherited script' + ZMTH_MATHEMATICALNOTATION='Zmth - Mathematical notation' + ZSYE_SYMBOLS_EMOJIVARIANT='Zsye - Symbols (Emoji variant)' + ZSYM_SYMBOLS='Zsym - Symbols' + ZXXX_CODEFORUNWRITTENDOCUMENTS='Zxxx - Code for unwritten documents' + ZYYY_CODEFORUNDETERMINEDSCRIPT='Zyyy - Code for undetermined script' + ZZZZ_CODEFORUNCODEDSCRIPT='Zzzz - Code for uncoded script' + OTHER='other' + + class TextDataTypeSimpleType(str, Enum): XSDDECIMAL='xsd:decimal' # Examples: "123.456", "+1234.456", "-1234.456", "-.456", "-456" XSDFLOAT='xsd:float' # Examples: "123.456", "+1234.456", "-1.2344e56", "-.45E-6", "INF", "-INF", "NaN" @@ -1087,10 +1524,58 @@ class UnderlineStyleSimpleType(str, Enum): OTHER='other' +class charTypeType(str, Enum): + """charTypeType -- + Type of character represented by the + grapheme, group, or non-printing character element. + + """ + BASE='base' + COMBINING='combining' + + +class imageResolutionUnitType(str, Enum): + """imageResolutionUnitType -- + Specifies the unit of the resolution information + referring to a standardised unit of measurement + (pixels per inch, pixels per centimeter or other). + + """ + PPI='PPI' + PPCM='PPCM' + OTHER='other' + + +class typeType(str, Enum): + """typeType -- + Type of metadata (e.g. author) + + """ + AUTHOR='author' + IMAGE_PROPERTIES='imageProperties' + PROCESSING_STEP='processingStep' + OTHER='other' + + +class typeType1(str, Enum): + LINK='link' + JOIN='join' + + +class typeType3(str, Enum): + XSDSTRING='xsd:string' + XSDINTEGER='xsd:integer' + XSDBOOLEAN='xsd:boolean' + XSDFLOAT='xsd:float' + + +# +# Start data representation classes +# class PcGtsType(GeneratedsSuper): __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('pcGtsId', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('pcGtsId', 'string', 0, 1, {'use': 'optional', 'name': 'pcGtsId'}), MemberSpec_('Metadata', 'MetadataType', 0, 0, {'name': 'Metadata', 'type': 'MetadataType'}, None), MemberSpec_('Page', 'PageType', 0, 0, {'name': 'Page', 'type': 'PageType'}, None), ] @@ -1101,7 +1586,7 @@ def __init__(self, pcGtsId=None, Metadata=None, Page=None, gds_collector_=None, self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.pcGtsId = _cast(None, pcGtsId) self.pcGtsId_nsprefix_ = "pc" self.Metadata = Metadata @@ -1135,7 +1620,7 @@ def get_pcGtsId(self): return self.pcGtsId def set_pcGtsId(self, pcGtsId): self.pcGtsId = pcGtsId - def hasContent_(self): + def has__content(self): if ( self.Metadata is not None or self.Page is not None @@ -1158,19 +1643,19 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PcGtsType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PcGtsType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PcGtsType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PcGtsType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PcGtsType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PcGtsType'): if self.pcGtsId is not None and 'pcGtsId' not in already_processed: already_processed.add('pcGtsId') outfile.write(' pcGtsId=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.pcGtsId), input_name='pcGtsId')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PcGtsType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PcGtsType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -1181,7 +1666,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml if self.Page is not None: namespaceprefix_ = self.Page_nsprefix_ + ':' if (UseCapturedNS_ and self.Page_nsprefix_) else '' self.Page.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Page', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='PcGtsType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='PcGtsType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -1190,12 +1675,14 @@ def to_etree(self, parent_element=None, name_='PcGtsType', mapping_=None, nsmap_ element.set('pcGtsId', self.gds_format_string(self.pcGtsId)) if self.Metadata is not None: Metadata_ = self.Metadata - Metadata_.to_etree(element, name_='Metadata', mapping_=mapping_, nsmap_=nsmap_) + Metadata_.to_etree(element, name_='Metadata', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Page is not None: Page_ = self.Page - Page_.to_etree(element, name_='Page', mapping_=mapping_, nsmap_=nsmap_) + Page_.to_etree(element, name_='Page', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -1203,17 +1690,17 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('pcGtsId', node) if value is not None and 'pcGtsId' not in already_processed: already_processed.add('pcGtsId') self.pcGtsId = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Metadata': obj_ = MetadataType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -1303,10 +1790,20 @@ def prune_ReadingOrder(self): class MetadataType(GeneratedsSuper): - """External reference of any kind""" + """externalRef -- External reference of any kind + Created -- + The timestamp has to be in UTC (Coordinated + Universal Time) and not local time. + + * LastChange -- + The timestamp has to be in UTC + (Coordinated Universal Time) + and not local time. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('externalRef', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('externalRef', 'string', 0, 1, {'use': 'optional', 'name': 'externalRef'}), MemberSpec_('Creator', 'string', 0, 0, {'name': 'Creator', 'type': 'string'}, None), MemberSpec_('Created', 'dateTime', 0, 0, {'name': 'Created', 'type': 'dateTime'}, None), MemberSpec_('LastChange', 'dateTime', 0, 0, {'name': 'LastChange', 'type': 'dateTime'}, None), @@ -1396,7 +1893,7 @@ def get_externalRef(self): return self.externalRef def set_externalRef(self, externalRef): self.externalRef = externalRef - def hasContent_(self): + def has__content(self): if ( self.Creator is not None or self.Created is not None or @@ -1423,19 +1920,19 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MetadataType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MetadataType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MetadataType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MetadataType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MetadataType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MetadataType'): if self.externalRef is not None and 'externalRef' not in already_processed: already_processed.add('externalRef') outfile.write(' externalRef=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.externalRef), input_name='externalRef')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:None="http://www.w3.org/2001/XMLSchema" ', name_='MetadataType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:None="http://www.w3.org/2001/XMLSchema" ', name_='MetadataType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -1462,7 +1959,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for MetadataItem_ in self.MetadataItem: namespaceprefix_ = self.MetadataItem_nsprefix_ + ':' if (UseCapturedNS_ and self.MetadataItem_nsprefix_) else '' MetadataItem_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='MetadataItem', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='MetadataType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='MetadataType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -1483,11 +1980,13 @@ def to_etree(self, parent_element=None, name_='MetadataType', mapping_=None, nsm etree_.SubElement(element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}Comments').text = self.gds_format_string(Comments_) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for MetadataItem_ in self.MetadataItem: - MetadataItem_.to_etree(element, name_='MetadataItem', mapping_=mapping_, nsmap_=nsmap_) + MetadataItem_.to_etree(element, name_='MetadataItem', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -1495,17 +1994,17 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('externalRef', node) if value is not None and 'externalRef' not in already_processed: already_processed.add('externalRef') self.externalRef = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Creator': value_ = child_.text value_ = self.gds_parse_string(value_, node, 'Creator') @@ -1544,15 +2043,22 @@ def __hash__(self): class MetadataItemType(GeneratedsSuper): - """Type of metadata (e.g. author) - E.g. imagePhotometricInterpretation - E.g. RGB""" + """type -- + Type of metadata (e.g. author) + + * name -- + E.g. imagePhotometricInterpretation + + * value -- E.g. RGB + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('type_', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('name', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('value', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('date', 'dateTime', 0, 1, {'use': 'optional'}), + MemberSpec_('type_', 'typeType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('name', 'string', 0, 1, {'use': 'optional', 'name': 'name'}), + MemberSpec_('value', 'string', 0, 0, {'use': 'required', 'name': 'value'}), + MemberSpec_('date', 'dateTime', 0, 1, {'use': 'optional', 'name': 'date'}), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), ] subclass = None @@ -1562,7 +2068,7 @@ def __init__(self, type_=None, name=None, value=None, date=None, Labels=None, gd self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.type_ = _cast(None, type_) self.type__nsprefix_ = "pc" self.name = _cast(None, name) @@ -1620,7 +2126,20 @@ def get_date(self): return self.date def set_date(self, date): self.date = date - def hasContent_(self): + def validate_typeType(self, value): + # Validate type typeType, a restriction on string. + if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: + if not isinstance(value, str): + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) + return False + value = value + enumerations = ['author', 'imageProperties', 'processingStep', 'other'] + if value not in enumerations: + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on typeType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) + result = False + def has__content(self): if ( self.Labels ): @@ -1642,15 +2161,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MetadataItemType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MetadataItemType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MetadataItemType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MetadataItemType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MetadataItemType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MetadataItemType'): if self.type_ is not None and 'type_' not in already_processed: already_processed.add('type_') outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), )) @@ -1663,7 +2182,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.date is not None and 'date' not in already_processed: already_processed.add('date') outfile.write(' date="%s"' % self.gds_format_datetime(self.date, input_name='date')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataItemType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataItemType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -1671,7 +2190,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for Labels_ in self.Labels: namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='MetadataItemType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='MetadataItemType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -1685,9 +2204,11 @@ def to_etree(self, parent_element=None, name_='MetadataItemType', mapping_=None, if self.date is not None: element.set('date', self.gds_format_datetime(self.date)) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -1695,16 +2216,17 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('type', node) if value is not None and 'type' not in already_processed: already_processed.add('type') self.type_ = value + self.validate_typeType(self.type_) # validate type typeType value = find_attr_value_('name', node) if value is not None and 'name' not in already_processed: already_processed.add('name') @@ -1720,7 +2242,7 @@ def buildAttributes(self, node, attrs, already_processed): self.date = self.gds_parse_datetime(value) except ValueError as exp: raise ValueError('Bad date-time attribute (date): %s' % exp) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Labels': obj_ = LabelsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -1732,16 +2254,26 @@ def __hash__(self): class LabelsType(GeneratedsSuper): - """Reference to external model / ontology / schema - E.g. an RDF resource identifier - (to be used as subject or object of an RDF triple) - Prefix for all labels (e.g. first part of an URI)""" + """externalModel -- + Reference to external model / ontology / schema + + * externalId -- + E.g. an RDF resource identifier + (to be used as subject or object of an RDF triple) + + * prefix -- + Prefix for all labels (e.g. first part of an URI) + + * Label -- + A semantic label / tag + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('externalModel', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('externalId', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('prefix', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('externalModel', 'string', 0, 1, {'use': 'optional', 'name': 'externalModel'}), + MemberSpec_('externalId', 'string', 0, 1, {'use': 'optional', 'name': 'externalId'}), + MemberSpec_('prefix', 'string', 0, 1, {'use': 'optional', 'name': 'prefix'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('Label', 'LabelType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Label', 'type': 'LabelType'}, None), ] subclass = None @@ -1751,7 +2283,7 @@ def __init__(self, externalModel=None, externalId=None, prefix=None, comments=No self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.externalModel = _cast(None, externalModel) self.externalModel_nsprefix_ = "pc" self.externalId = _cast(None, externalId) @@ -1806,7 +2338,7 @@ def get_comments(self): return self.comments def set_comments(self, comments): self.comments = comments - def hasContent_(self): + def has__content(self): if ( self.Label ): @@ -1828,15 +2360,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LabelsType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LabelsType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LabelsType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LabelsType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LabelsType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LabelsType'): if self.externalModel is not None and 'externalModel' not in already_processed: already_processed.add('externalModel') outfile.write(' externalModel=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.externalModel), input_name='externalModel')), )) @@ -1849,7 +2381,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelsType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelsType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -1857,7 +2389,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for Label_ in self.Label: namespaceprefix_ = self.Label_nsprefix_ + ':' if (UseCapturedNS_ and self.Label_nsprefix_) else '' Label_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Label', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='LabelsType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='LabelsType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -1871,9 +2403,11 @@ def to_etree(self, parent_element=None, name_='LabelsType', mapping_=None, nsmap if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) for Label_ in self.Label: - Label_.to_etree(element, name_='Label', mapping_=mapping_, nsmap_=nsmap_) + Label_.to_etree(element, name_='Label', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -1881,12 +2415,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('externalModel', node) if value is not None and 'externalModel' not in already_processed: already_processed.add('externalModel') @@ -1903,7 +2437,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Label': obj_ = LabelType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -1915,18 +2449,23 @@ def __hash__(self): class LabelType(GeneratedsSuper): - """Semantic label + """LabelType -- Semantic label + value -- The label / tag (e.g. 'person'). Can be an RDF resource identifier (e.g. object of an RDF triple). - Additional information on the label - (e.g. 'YYYY-mm-dd' for a date label). - Can be used as predicate of an RDF triple.""" + + * type -- + Additional information on the label + (e.g. 'YYYY-mm-dd' for a date label). + Can be used as predicate of an RDF triple. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('value', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('type_', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('value', 'string', 0, 0, {'use': 'required', 'name': 'value'}), + MemberSpec_('type_', 'string', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), ] subclass = None superclass = None @@ -1935,7 +2474,7 @@ def __init__(self, value=None, type_=None, comments=None, gds_collector_=None, * self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.value = _cast(None, value) self.value_nsprefix_ = "pc" self.type_ = _cast(None, type_) @@ -1969,7 +2508,7 @@ def get_comments(self): return self.comments def set_comments(self, comments): self.comments = comments - def hasContent_(self): + def has__content(self): if ( ): @@ -1991,14 +2530,14 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LabelType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LabelType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LabelType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LabelType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LabelType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LabelType'): if self.value is not None and 'value' not in already_processed: already_processed.add('value') outfile.write(' value=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.value), input_name='value')), )) @@ -2008,9 +2547,9 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='LabelType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='LabelType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -2023,6 +2562,8 @@ def to_etree(self, parent_element=None, name_='LabelType', mapping_=None, nsmap_ element.set('comments', self.gds_format_string(self.comments)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -2030,12 +2571,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('value', node) if value is not None and 'value' not in already_processed: already_processed.add('value') @@ -2048,7 +2589,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -2056,57 +2597,102 @@ def __hash__(self): class PageType(GeneratedsSuper): - """Contains the image file name including the file extension. - Specifies the width of the image.Specifies the height of the - image.Specifies the image resolution in width.Specifies the image - resolution in height. - Specifies the unit of the resolution information - referring to a standardised unit of measurement - (pixels per inch, pixels per centimeter or other). - For generic use - The angle the rectangle encapsulating the page - (or its Border) has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - (The rotated image can be further referenced - via “AlternativeImage”.) - Range: -179.999,180 - The type of the page within the document - (e.g. cover page). - The primary language used in the page - (lower-level definitions override the page-level definition). - The secondary language used in the page - (lower-level definitions override the page-level definition). - The primary script used in the page - (lower-level definitions override the page-level definition). - The secondary script used in the page - (lower-level definitions override the page-level definition). - The direction in which text within lines - should be read (order of words and characters), - in addition to “textLineOrder” - (lower-level definitions override the page-level definition). - The order of text lines within a block, - in addition to “readingDirection” - (lower-level definitions override the page-level definition). - Confidence value for whole page (between 0 and 1)""" + """imageFilename -- + Contains the image file name including the file extension. + + * imageWidth -- Specifies the width of the image. + * imageHeight -- Specifies the height of the image. + * imageXResolution -- Specifies the image resolution in width. + * imageYResolution -- Specifies the image resolution in height. + * imageResolutionUnit -- + Specifies the unit of the resolution information + referring to a standardised unit of measurement + (pixels per inch, pixels per centimeter or other). + + * custom -- For generic use + * orientation -- + The angle the rectangle encapsulating the page + (or its Border) has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + (The rotated image can be further referenced + via + “ + AlternativeImage + ” + .) + Range: -179.999,180 + + * type -- + The type of the page within the document + (e.g. cover page). + + * primaryLanguage -- + The primary language used in the page + (lower-level definitions override the page-level definition). + + * secondaryLanguage -- + The secondary language used in the page + (lower-level definitions override the page-level definition). + + * primaryScript -- + The primary script used in the page + (lower-level definitions override the page-level definition). + + * secondaryScript -- + The secondary script used in the page + (lower-level definitions override the page-level definition). + + * readingDirection -- + The direction in which text within lines + should be read (order of words and characters), + in addition to + “ + textLineOrder + ” + (lower-level definitions override the page-level definition). + + * textLineOrder -- + The order of text lines within a block, + in addition to + “ + readingDirection + ” + (lower-level definitions override the page-level definition). + + * conf -- Confidence value for whole page (between 0 and 1) + * AlternativeImage -- + Alternative document page images + (e.g. black-and-white). + + * ReadingOrder -- Order of blocks within the page. + * Layers -- + Unassigned regions are considered to be in the + (virtual) default layer which is to be treated + as below any other layers. + + * TextStyle -- Default text style + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('imageFilename', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('imageWidth', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('imageHeight', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('imageXResolution', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('imageYResolution', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('imageResolutionUnit', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:PageTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('secondaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('textLineOrder', 'pc:TextLineOrderSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('imageFilename', 'string', 0, 0, {'use': 'required', 'name': 'imageFilename'}), + MemberSpec_('imageWidth', 'int', 0, 0, {'use': 'required', 'name': 'imageWidth'}), + MemberSpec_('imageHeight', 'int', 0, 0, {'use': 'required', 'name': 'imageHeight'}), + MemberSpec_('imageXResolution', 'float', 0, 1, {'use': 'optional', 'name': 'imageXResolution'}), + MemberSpec_('imageYResolution', 'float', 0, 1, {'use': 'optional', 'name': 'imageYResolution'}), + MemberSpec_('imageResolutionUnit', 'imageResolutionUnitType', 0, 1, {'use': 'optional', 'name': 'imageResolutionUnit'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('type_', 'pc:PageTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional', 'name': 'primaryLanguage'}), + MemberSpec_('secondaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional', 'name': 'secondaryLanguage'}), + MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'primaryScript'}), + MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'secondaryScript'}), + MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional', 'name': 'readingDirection'}), + MemberSpec_('textLineOrder', 'pc:TextLineOrderSimpleType', 0, 1, {'use': 'optional', 'name': 'textLineOrder'}), + MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional', 'name': 'conf'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Border', 'BorderType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'Border', 'type': 'BorderType'}, None), MemberSpec_('PrintSpace', 'PrintSpaceType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'PrintSpace', 'type': 'PrintSpaceType'}, None), @@ -2139,7 +2725,7 @@ def __init__(self, imageFilename=None, imageWidth=None, imageHeight=None, imageX self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.imageFilename = _cast(None, imageFilename) self.imageFilename_nsprefix_ = "pc" self.imageWidth = _cast(int, imageWidth) @@ -2548,6 +3134,19 @@ def get_conf(self): return self.conf def set_conf(self, conf): self.conf = conf + def validate_imageResolutionUnitType(self, value): + # Validate type imageResolutionUnitType, a restriction on string. + if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: + if not isinstance(value, str): + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) + return False + value = value + enumerations = ['PPI', 'PPCM', 'other'] + if value not in enumerations: + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on imageResolutionUnitType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) + result = False def validate_PageTypeSimpleType(self, value): # Validate type pc:PageTypeSimpleType, a restriction on string. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: @@ -2628,7 +3227,7 @@ def validate_ConfSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd maxInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.AlternativeImage or self.Border is not None or @@ -2673,15 +3272,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PageType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PageType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PageType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PageType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PageType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PageType'): if self.imageFilename is not None and 'imageFilename' not in already_processed: already_processed.add('imageFilename') outfile.write(' imageFilename=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.imageFilename), input_name='imageFilename')), )) @@ -2730,7 +3329,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.conf is not None and 'conf' not in already_processed: already_processed.add('conf') outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PageType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PageType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -2807,7 +3406,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for CustomRegion_ in self.CustomRegion: namespaceprefix_ = self.CustomRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.CustomRegion_nsprefix_) else '' CustomRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='CustomRegion', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='PageType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='PageType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -2845,62 +3444,64 @@ def to_etree(self, parent_element=None, name_='PageType', mapping_=None, nsmap_= if self.conf is not None: element.set('conf', self.gds_format_float(self.conf)) for AlternativeImage_ in self.AlternativeImage: - AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, nsmap_=nsmap_) + AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Border is not None: Border_ = self.Border - Border_.to_etree(element, name_='Border', mapping_=mapping_, nsmap_=nsmap_) + Border_.to_etree(element, name_='Border', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.PrintSpace is not None: PrintSpace_ = self.PrintSpace - PrintSpace_.to_etree(element, name_='PrintSpace', mapping_=mapping_, nsmap_=nsmap_) + PrintSpace_.to_etree(element, name_='PrintSpace', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.ReadingOrder is not None: ReadingOrder_ = self.ReadingOrder - ReadingOrder_.to_etree(element, name_='ReadingOrder', mapping_=mapping_, nsmap_=nsmap_) + ReadingOrder_.to_etree(element, name_='ReadingOrder', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Layers is not None: Layers_ = self.Layers - Layers_.to_etree(element, name_='Layers', mapping_=mapping_, nsmap_=nsmap_) + Layers_.to_etree(element, name_='Layers', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Relations is not None: Relations_ = self.Relations - Relations_.to_etree(element, name_='Relations', mapping_=mapping_, nsmap_=nsmap_) + Relations_.to_etree(element, name_='Relations', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.TextStyle is not None: TextStyle_ = self.TextStyle - TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, nsmap_=nsmap_) + TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TextRegion_ in self.TextRegion: - TextRegion_.to_etree(element, name_='TextRegion', mapping_=mapping_, nsmap_=nsmap_) + TextRegion_.to_etree(element, name_='TextRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for ImageRegion_ in self.ImageRegion: - ImageRegion_.to_etree(element, name_='ImageRegion', mapping_=mapping_, nsmap_=nsmap_) + ImageRegion_.to_etree(element, name_='ImageRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for LineDrawingRegion_ in self.LineDrawingRegion: - LineDrawingRegion_.to_etree(element, name_='LineDrawingRegion', mapping_=mapping_, nsmap_=nsmap_) + LineDrawingRegion_.to_etree(element, name_='LineDrawingRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for GraphicRegion_ in self.GraphicRegion: - GraphicRegion_.to_etree(element, name_='GraphicRegion', mapping_=mapping_, nsmap_=nsmap_) + GraphicRegion_.to_etree(element, name_='GraphicRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TableRegion_ in self.TableRegion: - TableRegion_.to_etree(element, name_='TableRegion', mapping_=mapping_, nsmap_=nsmap_) + TableRegion_.to_etree(element, name_='TableRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for ChartRegion_ in self.ChartRegion: - ChartRegion_.to_etree(element, name_='ChartRegion', mapping_=mapping_, nsmap_=nsmap_) + ChartRegion_.to_etree(element, name_='ChartRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for MapRegion_ in self.MapRegion: - MapRegion_.to_etree(element, name_='MapRegion', mapping_=mapping_, nsmap_=nsmap_) + MapRegion_.to_etree(element, name_='MapRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for SeparatorRegion_ in self.SeparatorRegion: - SeparatorRegion_.to_etree(element, name_='SeparatorRegion', mapping_=mapping_, nsmap_=nsmap_) + SeparatorRegion_.to_etree(element, name_='SeparatorRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for MathsRegion_ in self.MathsRegion: - MathsRegion_.to_etree(element, name_='MathsRegion', mapping_=mapping_, nsmap_=nsmap_) + MathsRegion_.to_etree(element, name_='MathsRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for ChemRegion_ in self.ChemRegion: - ChemRegion_.to_etree(element, name_='ChemRegion', mapping_=mapping_, nsmap_=nsmap_) + ChemRegion_.to_etree(element, name_='ChemRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for MusicRegion_ in self.MusicRegion: - MusicRegion_.to_etree(element, name_='MusicRegion', mapping_=mapping_, nsmap_=nsmap_) + MusicRegion_.to_etree(element, name_='MusicRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for AdvertRegion_ in self.AdvertRegion: - AdvertRegion_.to_etree(element, name_='AdvertRegion', mapping_=mapping_, nsmap_=nsmap_) + AdvertRegion_.to_etree(element, name_='AdvertRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for NoiseRegion_ in self.NoiseRegion: - NoiseRegion_.to_etree(element, name_='NoiseRegion', mapping_=mapping_, nsmap_=nsmap_) + NoiseRegion_.to_etree(element, name_='NoiseRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for UnknownRegion_ in self.UnknownRegion: - UnknownRegion_.to_etree(element, name_='UnknownRegion', mapping_=mapping_, nsmap_=nsmap_) + UnknownRegion_.to_etree(element, name_='UnknownRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for CustomRegion_ in self.CustomRegion: - CustomRegion_.to_etree(element, name_='CustomRegion', mapping_=mapping_, nsmap_=nsmap_) + CustomRegion_.to_etree(element, name_='CustomRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -2908,12 +3509,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('imageFilename', node) if value is not None and 'imageFilename' not in already_processed: already_processed.add('imageFilename') @@ -2940,6 +3541,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'imageResolutionUnit' not in already_processed: already_processed.add('imageResolutionUnit') self.imageResolutionUnit = value + self.validate_imageResolutionUnitType(self.imageResolutionUnit) # validate type imageResolutionUnitType value = find_attr_value_('custom', node) if value is not None and 'custom' not in already_processed: already_processed.add('custom') @@ -2990,7 +3592,7 @@ def buildAttributes(self, node, attrs, already_processed): value = self.gds_parse_float(value, node, 'conf') self.conf = value self.validate_ConfSimpleType(self.conf) # validate type ConfSimpleType - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'AlternativeImage': obj_ = AlternativeImageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -3362,18 +3964,22 @@ def set_orientation(self, orientation): class CoordsType(GeneratedsSuper): - """Polygon outline of the element as a path of points. + """points -- + Polygon outline of the element as a path of points. No points may lie outside the outline of its parent, which in the case of Border is the bounding rectangle of the root image. Paths are closed by convention, i.e. the last point logically connects with the first (and at least 3 points are required to span an area). Paths must be planar (i.e. must not self-intersect). - Confidence value (between 0 and 1)""" + + * conf -- Confidence value (between 0 and 1) + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('points', 'pc:PointsType', 0, 0, {'use': 'required'}), - MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('points', 'pc:PointsType', 0, 0, {'use': 'required', 'name': 'points'}), + MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional', 'name': 'conf'}), ] subclass = None superclass = None @@ -3382,7 +3988,7 @@ def __init__(self, points=None, conf=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.points = _cast(None, points) self.points_nsprefix_ = "pc" self.conf = _cast(float, conf) @@ -3436,7 +4042,7 @@ def validate_ConfSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd maxInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( ): @@ -3458,23 +4064,23 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='CoordsType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='CoordsType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='CoordsType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='CoordsType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='CoordsType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='CoordsType'): if self.points is not None and 'points' not in already_processed: already_processed.add('points') outfile.write(' points=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.points), input_name='points')), )) if self.conf is not None and 'conf' not in already_processed: already_processed.add('conf') outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CoordsType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CoordsType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='CoordsType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='CoordsType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -3485,6 +4091,8 @@ def to_etree(self, parent_element=None, name_='CoordsType', mapping_=None, nsmap element.set('conf', self.gds_format_float(self.conf)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -3492,12 +4100,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('points', node) if value is not None and 'points' not in already_processed: already_processed.add('points') @@ -3509,7 +4117,7 @@ def buildAttributes(self, node, attrs, already_processed): value = self.gds_parse_float(value, node, 'conf') self.conf = value self.validate_ConfSimpleType(self.conf) # validate type ConfSimpleType - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -3533,28 +4141,51 @@ def set_points(self, points): class TextLineType(GeneratedsSuper): - """Overrides primaryLanguage attribute of parent text + """primaryLanguage -- + Overrides primaryLanguage attribute of parent text region - The primary script used in the text line - The secondary script used in the text line - The direction in which text within the line - should be read (order of words and characters). - Overrides the production attribute of the parent - text region - For generic use - Position (order number) of this text line within the - parent text region.""" + + * primaryScript -- + The primary script used in the text line + + * secondaryScript -- + The secondary script used in the text line + + * readingDirection -- + The direction in which text within the line + should be read (order of words and characters). + + * production -- + Overrides the production attribute of the parent + text region + + * custom -- For generic use + * index -- + Position (order number) of this text line within the + parent text region. + + * AlternativeImage -- + Alternative text line images (e.g. + black-and-white) + + * Baseline -- + Multiple connected points that mark the baseline + of the glyphs + + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('index', 'int', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional', 'name': 'primaryLanguage'}), + MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'primaryScript'}), + MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'secondaryScript'}), + MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional', 'name': 'readingDirection'}), + MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional', 'name': 'production'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), + MemberSpec_('index', 'int', 0, 1, {'use': 'optional', 'name': 'index'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), MemberSpec_('Baseline', 'BaselineType', 0, 1, {'minOccurs': '0', 'name': 'Baseline', 'type': 'BaselineType'}, None), @@ -3571,7 +4202,7 @@ def __init__(self, id=None, primaryLanguage=None, primaryScript=None, secondaryS self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.primaryLanguage = _cast(None, primaryLanguage) @@ -3777,7 +4408,7 @@ def validate_ProductionSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ProductionSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.AlternativeImage or self.Coords is not None or @@ -3806,15 +4437,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextLineType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextLineType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextLineType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextLineType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextLineType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextLineType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -3842,7 +4473,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.index is not None and 'index' not in already_processed: already_processed.add('index') outfile.write(' index="%s"' % self.gds_format_integer(self.index, input_name='index')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextLineType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextLineType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -3871,7 +4502,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for Labels_ in self.Labels: namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='TextLineType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='TextLineType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -3895,27 +4526,29 @@ def to_etree(self, parent_element=None, name_='TextLineType', mapping_=None, nsm if self.index is not None: element.set('index', self.gds_format_integer(self.index)) for AlternativeImage_ in self.AlternativeImage: - AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, nsmap_=nsmap_) + AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Coords is not None: Coords_ = self.Coords - Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) + Coords_.to_etree(element, name_='Coords', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Baseline is not None: Baseline_ = self.Baseline - Baseline_.to_etree(element, name_='Baseline', mapping_=mapping_, nsmap_=nsmap_) + Baseline_.to_etree(element, name_='Baseline', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Word_ in self.Word: - Word_.to_etree(element, name_='Word', mapping_=mapping_, nsmap_=nsmap_) + Word_.to_etree(element, name_='Word', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TextEquiv_ in self.TextEquiv: - TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, nsmap_=nsmap_) + TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.TextStyle is not None: TextStyle_ = self.TextStyle - TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, nsmap_=nsmap_) + TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -3923,12 +4556,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -3970,7 +4603,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'index' not in already_processed: already_processed.add('index') self.index = self.gds_parse_integer(value, node, 'index') - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'AlternativeImage': obj_ = AlternativeImageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -4062,25 +4695,42 @@ def set_Coords(self, Coords): class WordType(GeneratedsSuper): - """Overrides primaryLanguage attribute of parent line + """language -- + Overrides primaryLanguage attribute of parent line and/or text region - The primary script used in the word - The secondary script used in the word - The direction in which text within the word - should be read (order of characters). - Overrides the production attribute of the parent - text line and/or text region. - For generic use""" + + * primaryScript -- + The primary script used in the word + + * secondaryScript -- + The secondary script used in the word + + * readingDirection -- + The direction in which text within the word + should be read (order of characters). + + * production -- + Overrides the production attribute of the parent + text line and/or text region. + + * custom -- For generic use + * AlternativeImage -- + Alternative word images (e.g. + black-and-white) + + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('language', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('language', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional', 'name': 'language'}), + MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'primaryScript'}), + MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'secondaryScript'}), + MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional', 'name': 'readingDirection'}), + MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional', 'name': 'production'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), MemberSpec_('Glyph', 'GlyphType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Glyph', 'type': 'GlyphType'}, None), @@ -4096,7 +4746,7 @@ def __init__(self, id=None, language=None, primaryScript=None, secondaryScript=N self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.language = _cast(None, language) @@ -4290,7 +4940,7 @@ def validate_ProductionSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ProductionSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.AlternativeImage or self.Coords is not None or @@ -4318,15 +4968,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='WordType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='WordType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='WordType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='WordType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='WordType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='WordType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -4351,7 +5001,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='WordType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='WordType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -4377,7 +5027,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for Labels_ in self.Labels: namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='WordType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='WordType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -4399,24 +5049,26 @@ def to_etree(self, parent_element=None, name_='WordType', mapping_=None, nsmap_= if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) for AlternativeImage_ in self.AlternativeImage: - AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, nsmap_=nsmap_) + AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Coords is not None: Coords_ = self.Coords - Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) + Coords_.to_etree(element, name_='Coords', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Glyph_ in self.Glyph: - Glyph_.to_etree(element, name_='Glyph', mapping_=mapping_, nsmap_=nsmap_) + Glyph_.to_etree(element, name_='Glyph', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TextEquiv_ in self.TextEquiv: - TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, nsmap_=nsmap_) + TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.TextStyle is not None: TextStyle_ = self.TextStyle - TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, nsmap_=nsmap_) + TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -4424,12 +5076,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -4467,7 +5119,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'AlternativeImage': obj_ = AlternativeImageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -4554,19 +5206,34 @@ def set_Coords(self, Coords): class GlyphType(GeneratedsSuper): - """The script used for the glyph - Overrides the production attribute of the parent - word / text line / text region. - For generic use""" + """script -- + The script used for the glyph + + * production -- + Overrides the production attribute of the parent + word / text line / text region. + + * custom -- For generic use + * AlternativeImage -- + Alternative glyph images (e.g. + black-and-white) + + * Graphemes -- + Container for graphemes, grapheme groups and + non-printing characters + + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('ligature', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('symbol', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('script', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('ligature', 'boolean', 0, 1, {'use': 'optional', 'name': 'ligature'}), + MemberSpec_('symbol', 'boolean', 0, 1, {'use': 'optional', 'name': 'symbol'}), + MemberSpec_('script', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'script'}), + MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional', 'name': 'production'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), MemberSpec_('Graphemes', 'GraphemesType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'Graphemes', 'type': 'GraphemesType'}, None), @@ -4582,7 +5249,7 @@ def __init__(self, id=None, ligature=None, symbol=None, script=None, production= self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.ligature = _cast(bool, ligature) @@ -4735,7 +5402,7 @@ def validate_ProductionSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ProductionSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.AlternativeImage or self.Coords is not None or @@ -4763,15 +5430,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GlyphType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GlyphType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GlyphType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GlyphType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GlyphType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GlyphType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -4793,7 +5460,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GlyphType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GlyphType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -4819,7 +5486,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for Labels_ in self.Labels: namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='GlyphType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='GlyphType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -4839,25 +5506,27 @@ def to_etree(self, parent_element=None, name_='GlyphType', mapping_=None, nsmap_ if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) for AlternativeImage_ in self.AlternativeImage: - AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, nsmap_=nsmap_) + AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Coords is not None: Coords_ = self.Coords - Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) + Coords_.to_etree(element, name_='Coords', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Graphemes is not None: Graphemes_ = self.Graphemes - Graphemes_.to_etree(element, name_='Graphemes', mapping_=mapping_, nsmap_=nsmap_) + Graphemes_.to_etree(element, name_='Graphemes', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TextEquiv_ in self.TextEquiv: - TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, nsmap_=nsmap_) + TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.TextStyle is not None: TextStyle_ = self.TextStyle - TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, nsmap_=nsmap_) + TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -4865,12 +5534,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -4911,7 +5580,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'AlternativeImage': obj_ = AlternativeImageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -4998,22 +5667,40 @@ def set_Coords(self, Coords): class TextEquivType(GeneratedsSuper): - """Used for sort order in case multiple TextEquivs are defined. + """index -- + Used for sort order in case multiple TextEquivs are defined. The text content with the lowest index should be interpreted as the main text content. - OCR confidence value (between 0 and 1) - Type of text content (is it free text or a number, for instance). - This is only a descriptive attribute, the text type - is not checked during XML validation. - Refinement for dataType attribute. Can be a regular expression, for - instance.""" + + * conf -- OCR confidence value (between 0 and 1) + * dataType -- + Type of text content (is it free text or a number, for instance). + This is only a descriptive attribute, the text type + is not checked during XML validation. + + * dataTypeDetails -- + Refinement for dataType attribute. Can be a regular expression, for instance. + + * PlainText -- + Text in a "simple" form (ASCII or extended ASCII + as mostly used for typing). I.e. no use of + special characters for ligatures (should be + stored as two separate characters) etc. + + * Unicode -- + Correct encoding of the original, always using + the corresponding Unicode code point. I.e. + ligatures have to be represented as one + character etc. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('index', 'integer', 0, 1, {'use': 'optional'}), - MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('dataType', 'pc:TextDataTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('dataTypeDetails', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('index', 'indexType', 0, 1, {'use': 'optional', 'name': 'index'}), + MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional', 'name': 'conf'}), + MemberSpec_('dataType', 'pc:TextDataTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'dataType'}), + MemberSpec_('dataTypeDetails', 'string', 0, 1, {'use': 'optional', 'name': 'dataTypeDetails'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('PlainText', 'string', 0, 1, {'minOccurs': '0', 'name': 'PlainText', 'type': 'string'}, None), MemberSpec_('Unicode', 'string', 0, 0, {'name': 'Unicode', 'type': 'string'}, None), ] @@ -5082,6 +5769,17 @@ def get_comments(self): return self.comments def set_comments(self, comments): self.comments = comments + def validate_indexType(self, value): + # Validate type indexType, a restriction on integer. + if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: + if not isinstance(value, int): + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (int)' % {"value": value, "lineno": lineno, }) + return False + if value < 0: + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd minInclusive restriction on indexType' % {"value": value, "lineno": lineno} ) + result = False def validate_ConfSimpleType(self, value): # Validate type pc:ConfSimpleType, a restriction on float. if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: @@ -5110,7 +5808,7 @@ def validate_TextDataTypeSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on TextDataTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.PlainText is not None or self.Unicode is not None @@ -5133,15 +5831,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextEquivType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextEquivType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextEquivType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextEquivType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextEquivType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextEquivType'): if self.index is not None and 'index' not in already_processed: already_processed.add('index') outfile.write(' index="%s"' % self.gds_format_integer(self.index, input_name='index')) @@ -5157,7 +5855,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:None="http://www.w3.org/2001/XMLSchema" ', name_='TextEquivType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15" xmlns:None="http://www.w3.org/2001/XMLSchema" ', name_='TextEquivType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -5170,7 +5868,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml namespaceprefix_ = self.Unicode_nsprefix_ + ':' if (UseCapturedNS_ and self.Unicode_nsprefix_) else '' showIndent(outfile, level, pretty_print) outfile.write('<%sUnicode>%s%s' % (namespaceprefix_ , self.gds_encode(self.gds_format_string(quote_xml(self.Unicode), input_name='Unicode')), namespaceprefix_ , eol_)) - def to_etree(self, parent_element=None, name_='TextEquivType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='TextEquivType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -5193,6 +5891,8 @@ def to_etree(self, parent_element=None, name_='TextEquivType', mapping_=None, ns etree_.SubElement(element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}Unicode').text = self.gds_format_string(Unicode_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -5200,16 +5900,17 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('index', node) if value is not None and 'index' not in already_processed: already_processed.add('index') self.index = self.gds_parse_integer(value, node, 'index') + self.validate_indexType(self.index) # validate type indexType value = find_attr_value_('conf', node) if value is not None and 'conf' not in already_processed: already_processed.add('conf') @@ -5229,7 +5930,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'PlainText': value_ = child_.text value_ = self.gds_parse_string(value_, node, 'PlainText') @@ -5248,7 +5949,15 @@ def __hash__(self): class GridType(GeneratedsSuper): - """Matrix of grid points defining the table grid on the page.""" + """GridType -- + Matrix of grid points defining the table grid on the page. + + * GridPoints -- + One row in the grid point matrix. + Points with x,y coordinates. + (note: for a table with n table rows there should be n+1 grid rows) + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('GridPoints', 'GridPointsType', 1, 0, {'maxOccurs': 'unbounded', 'minOccurs': '2', 'name': 'GridPoints', 'type': 'GridPointsType'}, None), @@ -5260,7 +5969,7 @@ def __init__(self, GridPoints=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" if GridPoints is None: self.GridPoints = [] else: @@ -5291,7 +6000,7 @@ def insert_GridPoints_at(self, index, value): self.GridPoints.insert(index, value) def replace_GridPoints_at(self, index, value): self.GridPoints[index] = value - def hasContent_(self): + def has__content(self): if ( self.GridPoints ): @@ -5313,17 +6022,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GridType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GridType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GridType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GridType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GridType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GridType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GridType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GridType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -5331,15 +6040,17 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for GridPoints_ in self.GridPoints: namespaceprefix_ = self.GridPoints_nsprefix_ + ':' if (UseCapturedNS_ and self.GridPoints_nsprefix_) else '' GridPoints_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='GridPoints', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='GridType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='GridType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) for GridPoints_ in self.GridPoints: - GridPoints_.to_etree(element, name_='GridPoints', mapping_=mapping_, nsmap_=nsmap_) + GridPoints_.to_etree(element, name_='GridPoints', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -5347,14 +6058,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'GridPoints': obj_ = GridPointsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -5366,12 +6077,15 @@ def __hash__(self): class GridPointsType(GeneratedsSuper): - """Points with x,y coordinates. - The grid row index""" + """GridPointsType -- Points with x,y coordinates. + index -- + The grid row index + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('index', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('points', 'pc:PointsType', 0, 0, {'use': 'required'}), + MemberSpec_('index', 'int', 0, 0, {'use': 'required', 'name': 'index'}), + MemberSpec_('points', 'pc:PointsType', 0, 0, {'use': 'required', 'name': 'points'}), ] subclass = None superclass = None @@ -5380,7 +6094,7 @@ def __init__(self, index=None, points=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.index = _cast(int, index) self.index_nsprefix_ = "pc" self.points = _cast(None, points) @@ -5419,7 +6133,7 @@ def validate_PointsType(self, value): self.validate_PointsType_patterns_, value): self.gds_collector_.add_message('Value "%s" does not match xsd pattern restrictions: %s' % (encode_str_2_3(value), self.validate_PointsType_patterns_, )) validate_PointsType_patterns_ = [['^(([0-9]+,[0-9]+ )+([0-9]+,[0-9]+))$']] - def hasContent_(self): + def has__content(self): if ( ): @@ -5441,23 +6155,23 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GridPointsType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GridPointsType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GridPointsType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GridPointsType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GridPointsType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GridPointsType'): if self.index is not None and 'index' not in already_processed: already_processed.add('index') outfile.write(' index="%s"' % self.gds_format_integer(self.index, input_name='index')) if self.points is not None and 'points' not in already_processed: already_processed.add('points') outfile.write(' points=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.points), input_name='points')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GridPointsType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GridPointsType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='GridPointsType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='GridPointsType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -5468,6 +6182,8 @@ def to_etree(self, parent_element=None, name_='GridPointsType', mapping_=None, n element.set('points', self.gds_format_string(self.points)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -5475,12 +6191,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('index', node) if value is not None and 'index' not in already_processed: already_processed.add('index') @@ -5490,7 +6206,7 @@ def buildAttributes(self, node, attrs, already_processed): already_processed.add('points') self.points = value self.validate_PointsType(self.points) # validate type PointsType - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -5498,13 +6214,16 @@ def __hash__(self): class PrintSpaceType(GeneratedsSuper): - """Determines the effective area on the paper of a printed page. + """PrintSpaceType -- + Determines the effective area on the paper of a printed page. Its size is equal for all pages of a book (exceptions: titlepage, multipage pictures). It contains all living elements (except marginals) like body type, footnotes, headings, running titles. It does not contain pagenumber (if not part of running title), - marginals, signature mark, preview words.""" + marginals, signature mark, preview words. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), @@ -5516,7 +6235,7 @@ def __init__(self, Coords=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.Coords = Coords self.Coords_nsprefix_ = "pc" def factory(*args_, **kwargs_): @@ -5538,7 +6257,7 @@ def get_Coords(self): return self.Coords def set_Coords(self, Coords): self.Coords = Coords - def hasContent_(self): + def has__content(self): if ( self.Coords is not None ): @@ -5560,17 +6279,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PrintSpaceType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='PrintSpaceType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PrintSpaceType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='PrintSpaceType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PrintSpaceType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='PrintSpaceType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PrintSpaceType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PrintSpaceType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -5578,16 +6297,18 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml if self.Coords is not None: namespaceprefix_ = self.Coords_nsprefix_ + ':' if (UseCapturedNS_ and self.Coords_nsprefix_) else '' self.Coords.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Coords', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='PrintSpaceType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='PrintSpaceType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.Coords is not None: Coords_ = self.Coords - Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) + Coords_.to_etree(element, name_='Coords', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -5595,14 +6316,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Coords': obj_ = CoordsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -5614,14 +6335,18 @@ def __hash__(self): class ReadingOrderType(GeneratedsSuper): - """Definition of the reading order within the page. + """ReadingOrderType -- + Definition of the reading order within the page. To express a reading order between elements they have to be included in an OrderedGroup. Groups may contain further groups. - Confidence value (between 0 and 1)""" + + * conf -- Confidence value (between 0 and 1) + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional', 'name': 'conf'}), MemberSpec_('OrderedGroup', 'OrderedGroupType', 0, 0, {'name': 'OrderedGroup', 'type': 'OrderedGroupType'}, 2), MemberSpec_('UnorderedGroup', 'UnorderedGroupType', 0, 0, {'name': 'UnorderedGroup', 'type': 'UnorderedGroupType'}, 2), ] @@ -5632,7 +6357,7 @@ def __init__(self, conf=None, OrderedGroup=None, UnorderedGroup=None, gds_collec self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.conf = _cast(float, conf) self.conf_nsprefix_ = "pc" self.OrderedGroup = OrderedGroup @@ -5681,7 +6406,7 @@ def validate_ConfSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd maxInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.OrderedGroup is not None or self.UnorderedGroup is not None @@ -5704,19 +6429,19 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ReadingOrderType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ReadingOrderType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ReadingOrderType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ReadingOrderType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ReadingOrderType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ReadingOrderType'): if self.conf is not None and 'conf' not in already_processed: already_processed.add('conf') outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ReadingOrderType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ReadingOrderType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -5727,7 +6452,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml if self.UnorderedGroup is not None: namespaceprefix_ = self.UnorderedGroup_nsprefix_ + ':' if (UseCapturedNS_ and self.UnorderedGroup_nsprefix_) else '' self.UnorderedGroup.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UnorderedGroup', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='ReadingOrderType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='ReadingOrderType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -5736,12 +6461,14 @@ def to_etree(self, parent_element=None, name_='ReadingOrderType', mapping_=None, element.set('conf', self.gds_format_float(self.conf)) if self.OrderedGroup is not None: OrderedGroup_ = self.OrderedGroup - OrderedGroup_.to_etree(element, name_='OrderedGroup', mapping_=mapping_, nsmap_=nsmap_) + OrderedGroup_.to_etree(element, name_='OrderedGroup', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.UnorderedGroup is not None: UnorderedGroup_ = self.UnorderedGroup - UnorderedGroup_.to_etree(element, name_='UnorderedGroup', mapping_=mapping_, nsmap_=nsmap_) + UnorderedGroup_.to_etree(element, name_='UnorderedGroup', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -5749,19 +6476,19 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('conf', node) if value is not None and 'conf' not in already_processed: already_processed.add('conf') value = self.gds_parse_float(value, node, 'conf') self.conf = value self.validate_ConfSimpleType(self.conf) # validate type ConfSimpleType - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'OrderedGroup': obj_ = OrderedGroupType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -5778,12 +6505,14 @@ def __hash__(self): class RegionRefIndexedType(GeneratedsSuper): - """Numbered regionPosition (order number) of this item within the current - hierarchy level.""" + """RegionRefIndexedType -- Numbered region + index -- Position (order number) of this item within the current hierarchy level. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('index', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('regionRef', 'string', 0, 0, {'use': 'required'}), + MemberSpec_('index', 'int', 0, 0, {'use': 'required', 'name': 'index'}), + MemberSpec_('regionRef', 'string', 0, 0, {'use': 'required', 'name': 'regionRef'}), ] subclass = None superclass = None @@ -5792,7 +6521,7 @@ def __init__(self, index=None, regionRef=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.index = _cast(int, index) self.index_nsprefix_ = "pc" self.regionRef = _cast(None, regionRef) @@ -5820,7 +6549,7 @@ def get_regionRef(self): return self.regionRef def set_regionRef(self, regionRef): self.regionRef = regionRef - def hasContent_(self): + def has__content(self): if ( ): @@ -5842,23 +6571,23 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RegionRefIndexedType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RegionRefIndexedType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RegionRefIndexedType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RegionRefIndexedType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RegionRefIndexedType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RegionRefIndexedType'): if self.index is not None and 'index' not in already_processed: already_processed.add('index') outfile.write(' index="%s"' % self.gds_format_integer(self.index, input_name='index')) if self.regionRef is not None and 'regionRef' not in already_processed: already_processed.add('regionRef') outfile.write(' regionRef=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.regionRef), input_name='regionRef')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionRefIndexedType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionRefIndexedType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='RegionRefIndexedType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='RegionRefIndexedType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -5869,6 +6598,8 @@ def to_etree(self, parent_element=None, name_='RegionRefIndexedType', mapping_=N element.set('regionRef', self.gds_format_string(self.regionRef)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -5876,12 +6607,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('index', node) if value is not None and 'index' not in already_processed: already_processed.add('index') @@ -5890,7 +6621,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'regionRef' not in already_processed: already_processed.add('regionRef') self.regionRef = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -5898,25 +6629,36 @@ def __hash__(self): class OrderedGroupIndexedType(GeneratedsSuper): - """Indexed group containing ordered elements - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - Position (order number) of this item within the - current hierarchy level. - Is this group a continuation of another group (from - previous column or page, for example)? - For generic use""" + """OrderedGroupIndexedType -- + Indexed group containing ordered elements + + * regionRef -- + Optional link to a parent region of nested regions. + The parent region doubles as reading order group. + Only the nested regions should be allowed as group members. + + * index -- + Position (order number) of this item within the + current hierarchy level. + + * continuation -- + Is this group a continuation of another group (from + previous column or page, for example)? + + * custom -- For generic use + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('index', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('caption', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional', 'name': 'regionRef'}), + MemberSpec_('index', 'int', 0, 0, {'use': 'required', 'name': 'index'}), + MemberSpec_('caption', 'string', 0, 1, {'use': 'optional', 'name': 'caption'}), + MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional', 'name': 'continuation'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), MemberSpec_('RegionRefIndexed', 'RegionRefIndexedType', 1, 0, {'name': 'RegionRefIndexed', 'type': 'RegionRefIndexedType'}, 3), @@ -5930,7 +6672,7 @@ def __init__(self, id=None, regionRef=None, index=None, caption=None, type_=None self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.regionRef = _cast(None, regionRef) @@ -6073,7 +6815,7 @@ def validate_GroupTypeSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on GroupTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.UserDefined is not None or self.Labels or @@ -6099,15 +6841,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='OrderedGroupIndexedType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='OrderedGroupIndexedType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='OrderedGroupIndexedType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='OrderedGroupIndexedType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='OrderedGroupIndexedType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='OrderedGroupIndexedType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -6132,7 +6874,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupIndexedType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupIndexedType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -6152,7 +6894,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for UnorderedGroupIndexed_ in self.UnorderedGroupIndexed: namespaceprefix_ = self.UnorderedGroupIndexed_nsprefix_ + ':' if (UseCapturedNS_ and self.UnorderedGroupIndexed_nsprefix_) else '' UnorderedGroupIndexed_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UnorderedGroupIndexed', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='OrderedGroupIndexedType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='OrderedGroupIndexedType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -6175,17 +6917,19 @@ def to_etree(self, parent_element=None, name_='OrderedGroupIndexedType', mapping element.set('comments', self.gds_format_string(self.comments)) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for RegionRefIndexed_ in self.RegionRefIndexed: - RegionRefIndexed_.to_etree(element, name_='RegionRefIndexed', mapping_=mapping_, nsmap_=nsmap_) + RegionRefIndexed_.to_etree(element, name_='RegionRefIndexed', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for OrderedGroupIndexed_ in self.OrderedGroupIndexed: - OrderedGroupIndexed_.to_etree(element, name_='OrderedGroupIndexed', mapping_=mapping_, nsmap_=nsmap_) + OrderedGroupIndexed_.to_etree(element, name_='OrderedGroupIndexed', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for UnorderedGroupIndexed_ in self.UnorderedGroupIndexed: - UnorderedGroupIndexed_.to_etree(element, name_='UnorderedGroupIndexed', mapping_=mapping_, nsmap_=nsmap_) + UnorderedGroupIndexed_.to_etree(element, name_='UnorderedGroupIndexed', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -6193,12 +6937,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -6237,7 +6981,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'UserDefined': obj_ = UserDefinedType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -6341,11 +7085,16 @@ def sort_AllIndexed(self, validate_uniqueness=True): return self.get_AllIndexed() # pylint: disable=line-too-long,invalid-name,missing-module-docstring,missing-function-docstring - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments - namespaceprefix_ = 'pc:' + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments + if pretty_print: + eol_ = '\n' + else: + eol_ = '' if self.UserDefined is not None: + namespaceprefix_ = self.UserDefined_nsprefix_ + ':' if (UseCapturedNS_ and self.UserDefined_nsprefix_) else '' self.UserDefined.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UserDefined', pretty_print=pretty_print) for Labels_ in self.Labels: + namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) cleaned = [] def replaceWithRRI(group): @@ -6363,30 +7112,41 @@ def replaceWithRRI(group): else: cleaned.append(entry) for entry in cleaned: - entry.export(outfile, level, namespaceprefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print) + entry.export(outfile, level, entry.ns_prefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print) # end class OrderedGroupIndexedType class UnorderedGroupIndexedType(GeneratedsSuper): - """Indexed group containing unordered elements - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - Position (order number) of this item within the - current hierarchy level. - Is this group a continuation of another group - (from previous column or page, for example)? - For generic use""" + """UnorderedGroupIndexedType -- + Indexed group containing unordered elements + + * regionRef -- + Optional link to a parent region of nested regions. + The parent region doubles as reading order group. + Only the nested regions should be allowed as group members. + + * index -- + Position (order number) of this item within the + current hierarchy level. + + * continuation -- + Is this group a continuation of another group + (from previous column or page, for example)? + + * custom -- For generic use + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('index', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('caption', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional', 'name': 'regionRef'}), + MemberSpec_('index', 'int', 0, 0, {'use': 'required', 'name': 'index'}), + MemberSpec_('caption', 'string', 0, 1, {'use': 'optional', 'name': 'caption'}), + MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional', 'name': 'continuation'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), MemberSpec_('RegionRef', 'RegionRefType', 1, 0, {'name': 'RegionRef', 'type': 'RegionRefType'}, 4), @@ -6400,7 +7160,7 @@ def __init__(self, id=None, regionRef=None, index=None, caption=None, type_=None self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.regionRef = _cast(None, regionRef) @@ -6543,7 +7303,7 @@ def validate_GroupTypeSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on GroupTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.UserDefined is not None or self.Labels or @@ -6569,15 +7329,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnorderedGroupIndexedType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnorderedGroupIndexedType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UnorderedGroupIndexedType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UnorderedGroupIndexedType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UnorderedGroupIndexedType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UnorderedGroupIndexedType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -6602,7 +7362,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UnorderedGroupIndexedType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UnorderedGroupIndexedType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -6622,7 +7382,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for UnorderedGroup_ in self.UnorderedGroup: namespaceprefix_ = self.UnorderedGroup_nsprefix_ + ':' if (UseCapturedNS_ and self.UnorderedGroup_nsprefix_) else '' UnorderedGroup_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UnorderedGroup', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='UnorderedGroupIndexedType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='UnorderedGroupIndexedType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -6645,17 +7405,19 @@ def to_etree(self, parent_element=None, name_='UnorderedGroupIndexedType', mappi element.set('comments', self.gds_format_string(self.comments)) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for RegionRef_ in self.RegionRef: - RegionRef_.to_etree(element, name_='RegionRef', mapping_=mapping_, nsmap_=nsmap_) + RegionRef_.to_etree(element, name_='RegionRef', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for OrderedGroup_ in self.OrderedGroup: - OrderedGroup_.to_etree(element, name_='OrderedGroup', mapping_=mapping_, nsmap_=nsmap_) + OrderedGroup_.to_etree(element, name_='OrderedGroup', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for UnorderedGroup_ in self.UnorderedGroup: - UnorderedGroup_.to_etree(element, name_='UnorderedGroup', mapping_=mapping_, nsmap_=nsmap_) + UnorderedGroup_.to_etree(element, name_='UnorderedGroup', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -6663,12 +7425,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -6707,7 +7469,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'UserDefined': obj_ = UserDefinedType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -6748,7 +7510,7 @@ def get_UnorderedGroupChildren(self): class RegionRefType(GeneratedsSuper): __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('regionRef', 'string', 0, 0, {'use': 'required'}), + MemberSpec_('regionRef', 'string', 0, 0, {'use': 'required', 'name': 'regionRef'}), ] subclass = None superclass = None @@ -6757,7 +7519,7 @@ def __init__(self, regionRef=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.regionRef = _cast(None, regionRef) self.regionRef_nsprefix_ = "pc" def factory(*args_, **kwargs_): @@ -6779,7 +7541,7 @@ def get_regionRef(self): return self.regionRef def set_regionRef(self, regionRef): self.regionRef = regionRef - def hasContent_(self): + def has__content(self): if ( ): @@ -6801,20 +7563,20 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RegionRefType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RegionRefType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RegionRefType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RegionRefType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RegionRefType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RegionRefType'): if self.regionRef is not None and 'regionRef' not in already_processed: already_processed.add('regionRef') outfile.write(' regionRef=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.regionRef), input_name='regionRef')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionRefType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionRefType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='RegionRefType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='RegionRefType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -6823,6 +7585,8 @@ def to_etree(self, parent_element=None, name_='RegionRefType', mapping_=None, ns element.set('regionRef', self.gds_format_string(self.regionRef)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -6830,17 +7594,17 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('regionRef', node) if value is not None and 'regionRef' not in already_processed: already_processed.add('regionRef') self.regionRef = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -6848,22 +7612,31 @@ def __hash__(self): class OrderedGroupType(GeneratedsSuper): - """Numbered group (contains ordered elements) - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - Is this group a continuation of another group - (from previous column or page, for example)? - For generic use""" + """OrderedGroupType -- + Numbered group (contains ordered elements) + + * regionRef -- + Optional link to a parent region of nested regions. + The parent region doubles as reading order group. + Only the nested regions should be allowed as group members. + + * continuation -- + Is this group a continuation of another group + (from previous column or page, for example)? + + * custom -- For generic use + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('caption', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional', 'name': 'regionRef'}), + MemberSpec_('caption', 'string', 0, 1, {'use': 'optional', 'name': 'caption'}), + MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional', 'name': 'continuation'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), MemberSpec_('RegionRefIndexed', 'RegionRefIndexedType', 1, 0, {'name': 'RegionRefIndexed', 'type': 'RegionRefIndexedType'}, 5), @@ -6877,7 +7650,7 @@ def __init__(self, id=None, regionRef=None, caption=None, type_=None, continuati self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.regionRef = _cast(None, regionRef) @@ -7014,7 +7787,7 @@ def validate_GroupTypeSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on GroupTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.UserDefined is not None or self.Labels or @@ -7040,15 +7813,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='OrderedGroupType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='OrderedGroupType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='OrderedGroupType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='OrderedGroupType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='OrderedGroupType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='OrderedGroupType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -7070,7 +7843,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -7090,7 +7863,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for UnorderedGroupIndexed_ in self.UnorderedGroupIndexed: namespaceprefix_ = self.UnorderedGroupIndexed_nsprefix_ + ':' if (UseCapturedNS_ and self.UnorderedGroupIndexed_nsprefix_) else '' UnorderedGroupIndexed_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UnorderedGroupIndexed', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='OrderedGroupType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='OrderedGroupType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -7111,17 +7884,19 @@ def to_etree(self, parent_element=None, name_='OrderedGroupType', mapping_=None, element.set('comments', self.gds_format_string(self.comments)) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for RegionRefIndexed_ in self.RegionRefIndexed: - RegionRefIndexed_.to_etree(element, name_='RegionRefIndexed', mapping_=mapping_, nsmap_=nsmap_) + RegionRefIndexed_.to_etree(element, name_='RegionRefIndexed', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for OrderedGroupIndexed_ in self.OrderedGroupIndexed: - OrderedGroupIndexed_.to_etree(element, name_='OrderedGroupIndexed', mapping_=mapping_, nsmap_=nsmap_) + OrderedGroupIndexed_.to_etree(element, name_='OrderedGroupIndexed', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for UnorderedGroupIndexed_ in self.UnorderedGroupIndexed: - UnorderedGroupIndexed_.to_etree(element, name_='UnorderedGroupIndexed', mapping_=mapping_, nsmap_=nsmap_) + UnorderedGroupIndexed_.to_etree(element, name_='UnorderedGroupIndexed', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -7129,12 +7904,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -7169,7 +7944,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'UserDefined': obj_ = UserDefinedType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -7273,11 +8048,16 @@ def sort_AllIndexed(self, validate_uniqueness=True): return self.get_AllIndexed() # pylint: disable=line-too-long,invalid-name,missing-module-docstring,missing-function-docstring - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments - namespaceprefix_ = 'pc:' + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments + if pretty_print: + eol_ = '\n' + else: + eol_ = '' if self.UserDefined is not None: + namespaceprefix_ = self.UserDefined_nsprefix_ + ':' if (UseCapturedNS_ and self.UserDefined_nsprefix_) else '' self.UserDefined.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UserDefined', pretty_print=pretty_print) for Labels_ in self.Labels: + namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) cleaned = [] def replaceWithRRI(group): @@ -7295,27 +8075,36 @@ def replaceWithRRI(group): else: cleaned.append(entry) for entry in cleaned: - entry.export(outfile, level, namespaceprefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print) + entry.export(outfile, level, entry.ns_prefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print) # end class OrderedGroupType class UnorderedGroupType(GeneratedsSuper): - """Numbered group (contains unordered elements) - Optional link to a parent region of nested regions. - The parent region doubles as reading order group. - Only the nested regions should be allowed as group members. - Is this group a continuation of another group - (from previous column or page, for example)? - For generic use""" + """UnorderedGroupType -- + Numbered group (contains unordered elements) + + * regionRef -- + Optional link to a parent region of nested regions. + The parent region doubles as reading order group. + Only the nested regions should be allowed as group members. + + * continuation -- + Is this group a continuation of another group + (from previous column or page, for example)? + + * custom -- For generic use + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('caption', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('regionRef', 'string', 0, 1, {'use': 'optional', 'name': 'regionRef'}), + MemberSpec_('caption', 'string', 0, 1, {'use': 'optional', 'name': 'caption'}), + MemberSpec_('type_', 'pc:GroupTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional', 'name': 'continuation'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), MemberSpec_('RegionRef', 'RegionRefType', 1, 0, {'name': 'RegionRef', 'type': 'RegionRefType'}, 6), @@ -7329,7 +8118,7 @@ def __init__(self, id=None, regionRef=None, caption=None, type_=None, continuati self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.regionRef = _cast(None, regionRef) @@ -7466,7 +8255,7 @@ def validate_GroupTypeSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on GroupTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.UserDefined is not None or self.Labels or @@ -7492,15 +8281,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnorderedGroupType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnorderedGroupType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UnorderedGroupType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UnorderedGroupType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UnorderedGroupType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UnorderedGroupType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -7522,7 +8311,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UnorderedGroupType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UnorderedGroupType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -7542,7 +8331,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for UnorderedGroup_ in self.UnorderedGroup: namespaceprefix_ = self.UnorderedGroup_nsprefix_ + ':' if (UseCapturedNS_ and self.UnorderedGroup_nsprefix_) else '' UnorderedGroup_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UnorderedGroup', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='UnorderedGroupType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='UnorderedGroupType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -7563,17 +8352,19 @@ def to_etree(self, parent_element=None, name_='UnorderedGroupType', mapping_=Non element.set('comments', self.gds_format_string(self.comments)) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for RegionRef_ in self.RegionRef: - RegionRef_.to_etree(element, name_='RegionRef', mapping_=mapping_, nsmap_=nsmap_) + RegionRef_.to_etree(element, name_='RegionRef', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for OrderedGroup_ in self.OrderedGroup: - OrderedGroup_.to_etree(element, name_='OrderedGroup', mapping_=mapping_, nsmap_=nsmap_) + OrderedGroup_.to_etree(element, name_='OrderedGroup', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for UnorderedGroup_ in self.UnorderedGroup: - UnorderedGroup_.to_etree(element, name_='UnorderedGroup', mapping_=mapping_, nsmap_=nsmap_) + UnorderedGroup_.to_etree(element, name_='UnorderedGroup', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -7581,12 +8372,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -7621,7 +8412,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'UserDefined': obj_ = UserDefinedType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -7660,8 +8451,11 @@ def get_UnorderedGroupChildren(self): class BorderType(GeneratedsSuper): - """Border of the actual page (if the scanned image - contains parts not belonging to the page).""" + """BorderType -- + Border of the actual page (if the scanned image + contains parts not belonging to the page). + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), @@ -7673,7 +8467,7 @@ def __init__(self, Coords=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.Coords = Coords self.Coords_nsprefix_ = "pc" def factory(*args_, **kwargs_): @@ -7695,7 +8489,7 @@ def get_Coords(self): return self.Coords def set_Coords(self, Coords): self.Coords = Coords - def hasContent_(self): + def has__content(self): if ( self.Coords is not None ): @@ -7717,17 +8511,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='BorderType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='BorderType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='BorderType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='BorderType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='BorderType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='BorderType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='BorderType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='BorderType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -7735,16 +8529,18 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml if self.Coords is not None: namespaceprefix_ = self.Coords_nsprefix_ + ':' if (UseCapturedNS_ and self.Coords_nsprefix_) else '' self.Coords.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Coords', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='BorderType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='BorderType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.Coords is not None: Coords_ = self.Coords - Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) + Coords_.to_etree(element, name_='Coords', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -7752,14 +8548,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Coords': obj_ = CoordsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -7785,9 +8581,12 @@ def set_Coords(self, Coords): class LayersType(GeneratedsSuper): - """Can be used to express the z-index of overlapping + """LayersType -- + Can be used to express the z-index of overlapping regions. An element with a greater z-index is always in - front of another element with lower z-index.""" + front of another element with lower z-index. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('Layer', 'LayerType', 1, 0, {'maxOccurs': 'unbounded', 'minOccurs': '1', 'name': 'Layer', 'type': 'LayerType'}, None), @@ -7799,7 +8598,7 @@ def __init__(self, Layer=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" if Layer is None: self.Layer = [] else: @@ -7830,7 +8629,7 @@ def insert_Layer_at(self, index, value): self.Layer.insert(index, value) def replace_Layer_at(self, index, value): self.Layer[index] = value - def hasContent_(self): + def has__content(self): if ( self.Layer ): @@ -7852,17 +8651,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LayersType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LayersType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LayersType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LayersType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LayersType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LayersType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LayersType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LayersType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -7870,15 +8669,17 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for Layer_ in self.Layer: namespaceprefix_ = self.Layer_nsprefix_ + ':' if (UseCapturedNS_ and self.Layer_nsprefix_) else '' Layer_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Layer', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='LayersType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='LayersType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) for Layer_ in self.Layer: - Layer_.to_etree(element, name_='Layer', mapping_=mapping_, nsmap_=nsmap_) + Layer_.to_etree(element, name_='Layer', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -7886,14 +8687,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Layer': obj_ = LayerType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -7907,9 +8708,9 @@ def __hash__(self): class LayerType(GeneratedsSuper): __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('zIndex', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('caption', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('zIndex', 'int', 0, 0, {'use': 'required', 'name': 'zIndex'}), + MemberSpec_('caption', 'string', 0, 1, {'use': 'optional', 'name': 'caption'}), MemberSpec_('RegionRef', 'RegionRefType', 1, 0, {'maxOccurs': 'unbounded', 'minOccurs': '1', 'name': 'RegionRef', 'type': 'RegionRefType'}, None), ] subclass = None @@ -7919,7 +8720,7 @@ def __init__(self, id=None, zIndex=None, caption=None, RegionRef=None, gds_colle self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.zIndex = _cast(int, zIndex) @@ -7968,7 +8769,7 @@ def get_caption(self): return self.caption def set_caption(self, caption): self.caption = caption - def hasContent_(self): + def has__content(self): if ( self.RegionRef ): @@ -7990,15 +8791,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LayerType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LayerType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LayerType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LayerType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LayerType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LayerType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -8008,7 +8809,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.caption is not None and 'caption' not in already_processed: already_processed.add('caption') outfile.write(' caption=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.caption), input_name='caption')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LayerType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LayerType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -8016,7 +8817,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for RegionRef_ in self.RegionRef: namespaceprefix_ = self.RegionRef_nsprefix_ + ':' if (UseCapturedNS_ and self.RegionRef_nsprefix_) else '' RegionRef_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='RegionRef', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='LayerType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='LayerType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -8028,9 +8829,11 @@ def to_etree(self, parent_element=None, name_='LayerType', mapping_=None, nsmap_ if self.caption is not None: element.set('caption', self.gds_format_string(self.caption)) for RegionRef_ in self.RegionRef: - RegionRef_.to_etree(element, name_='RegionRef', mapping_=mapping_, nsmap_=nsmap_) + RegionRef_.to_etree(element, name_='RegionRef', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -8038,12 +8841,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -8056,7 +8859,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'caption' not in already_processed: already_processed.add('caption') self.caption = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'RegionRef': obj_ = RegionRefType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -8068,11 +8871,13 @@ def __hash__(self): class BaselineType(GeneratedsSuper): - """Confidence value (between 0 and 1)""" + """conf -- Confidence value (between 0 and 1) + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('points', 'pc:PointsType', 0, 0, {'use': 'required'}), - MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('points', 'pc:PointsType', 0, 0, {'use': 'required', 'name': 'points'}), + MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional', 'name': 'conf'}), ] subclass = None superclass = None @@ -8081,7 +8886,7 @@ def __init__(self, points=None, conf=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.points = _cast(None, points) self.points_nsprefix_ = "pc" self.conf = _cast(float, conf) @@ -8135,7 +8940,7 @@ def validate_ConfSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd maxInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( ): @@ -8157,23 +8962,23 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='BaselineType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='BaselineType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='BaselineType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='BaselineType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='BaselineType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='BaselineType'): if self.points is not None and 'points' not in already_processed: already_processed.add('points') outfile.write(' points=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.points), input_name='points')), )) if self.conf is not None and 'conf' not in already_processed: already_processed.add('conf') outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='BaselineType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='BaselineType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='BaselineType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='BaselineType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -8184,6 +8989,8 @@ def to_etree(self, parent_element=None, name_='BaselineType', mapping_=None, nsm element.set('conf', self.gds_format_float(self.conf)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -8191,12 +8998,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('points', node) if value is not None and 'points' not in already_processed: already_processed.add('points') @@ -8208,7 +9015,7 @@ def buildAttributes(self, node, attrs, already_processed): value = self.gds_parse_float(value, node, 'conf') self.conf = value self.validate_ConfSimpleType(self.conf) # validate type ConfSimpleType - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -8216,9 +9023,12 @@ def __hash__(self): class RelationsType(GeneratedsSuper): - """Container for one-to-one relations between layout + """RelationsType -- + Container for one-to-one relations between layout objects (for example: DropCap - paragraph, caption - - image).""" + image). + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('Relation', 'RelationType', 1, 0, {'maxOccurs': 'unbounded', 'minOccurs': '1', 'name': 'Relation', 'type': 'RelationType'}, None), @@ -8230,7 +9040,7 @@ def __init__(self, Relation=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" if Relation is None: self.Relation = [] else: @@ -8261,7 +9071,7 @@ def insert_Relation_at(self, index, value): self.Relation.insert(index, value) def replace_Relation_at(self, index, value): self.Relation[index] = value - def hasContent_(self): + def has__content(self): if ( self.Relation ): @@ -8283,17 +9093,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RelationsType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RelationsType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RelationsType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RelationsType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RelationsType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RelationsType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RelationsType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RelationsType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -8301,15 +9111,17 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for Relation_ in self.Relation: namespaceprefix_ = self.Relation_nsprefix_ + ':' if (UseCapturedNS_ and self.Relation_nsprefix_) else '' Relation_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Relation', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='RelationsType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='RelationsType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) for Relation_ in self.Relation: - Relation_.to_etree(element, name_='Relation', mapping_=mapping_, nsmap_=nsmap_) + Relation_.to_etree(element, name_='Relation', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -8317,14 +9129,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Relation': obj_ = RelationType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -8336,7 +9148,8 @@ def __hash__(self): class RelationType(GeneratedsSuper): - """One-to-one relation between to layout object. Use 'link' + """RelationType -- + One-to-one relation between to layout object. Use 'link' for loose relations and 'join' for strong relations (where something is fragmented for instance). Examples for 'link': caption - image floating - @@ -8350,13 +9163,17 @@ class RelationType(GeneratedsSuper): pragraph is split across columns and the last word of the first paragraph DOES continue in the second paragraph) - For generic use""" + + * custom -- For generic use + * Labels -- Semantic labels / tags + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('type_', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('type_', 'typeType1', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('Labels', 'LabelsType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'Labels', 'type': 'LabelsType'}, None), MemberSpec_('SourceRegionRef', 'RegionRefType', 0, 0, {'maxOccurs': '1', 'minOccurs': '1', 'name': 'SourceRegionRef', 'type': 'RegionRefType'}, None), MemberSpec_('TargetRegionRef', 'RegionRefType', 0, 0, {'maxOccurs': '1', 'minOccurs': '1', 'name': 'TargetRegionRef', 'type': 'RegionRefType'}, None), @@ -8368,7 +9185,7 @@ def __init__(self, id=None, type_=None, custom=None, comments=None, Labels=None, self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.type_ = _cast(None, type_) @@ -8435,7 +9252,20 @@ def get_comments(self): return self.comments def set_comments(self, comments): self.comments = comments - def hasContent_(self): + def validate_typeType1(self, value): + # Validate type typeType1, a restriction on string. + if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: + if not isinstance(value, str): + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) + return False + value = value + enumerations = ['link', 'join'] + if value not in enumerations: + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on typeType1' % {"value" : encode_str_2_3(value), "lineno": lineno} ) + result = False + def has__content(self): if ( self.Labels or self.SourceRegionRef is not None or @@ -8459,15 +9289,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RelationType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RelationType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RelationType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RelationType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RelationType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RelationType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -8480,7 +9310,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.comments is not None and 'comments' not in already_processed: already_processed.add('comments') outfile.write(' comments=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.comments), input_name='comments')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RelationType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RelationType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -8494,7 +9324,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml if self.TargetRegionRef is not None: namespaceprefix_ = self.TargetRegionRef_nsprefix_ + ':' if (UseCapturedNS_ and self.TargetRegionRef_nsprefix_) else '' self.TargetRegionRef.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TargetRegionRef', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='RelationType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='RelationType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -8508,15 +9338,17 @@ def to_etree(self, parent_element=None, name_='RelationType', mapping_=None, nsm if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.SourceRegionRef is not None: SourceRegionRef_ = self.SourceRegionRef - SourceRegionRef_.to_etree(element, name_='SourceRegionRef', mapping_=mapping_, nsmap_=nsmap_) + SourceRegionRef_.to_etree(element, name_='SourceRegionRef', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.TargetRegionRef is not None: TargetRegionRef_ = self.TargetRegionRef - TargetRegionRef_.to_etree(element, name_='TargetRegionRef', mapping_=mapping_, nsmap_=nsmap_) + TargetRegionRef_.to_etree(element, name_='TargetRegionRef', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -8524,12 +9356,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -8538,6 +9370,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'type' not in already_processed: already_processed.add('type') self.type_ = value + self.validate_typeType1(self.type_) # validate type typeType1 value = find_attr_value_('custom', node) if value is not None and 'custom' not in already_processed: already_processed.add('custom') @@ -8546,7 +9379,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'comments' not in already_processed: already_processed.add('comments') self.comments = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Labels': obj_ = LabelsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -8568,49 +9401,69 @@ def __hash__(self): class TextStyleType(GeneratedsSuper): - """Monospace (fixed-pitch, non-proportional) or + """TextStyleType -- + Monospace (fixed-pitch, non-proportional) or proportional font. - For instance: Arial, Times New Roman. - Add more information if necessary - (e.g. blackletter, antiqua). - Serif or sans-serif typeface. - The size of the characters in points. - The x-height or corpus size refers to the distance - between the baseline and the mean line of - lower-case letters in a typeface. - The unit is assumed to be pixels. - The degree of space (in points) between - the characters in a string of text. - Text colour in RGB encoded format - (red value) + (256 x green value) + (65536 x blue value). - Background colour - Background colour in RGB encoded format - (red value) + (256 x green value) + (65536 x blue value). - Specifies whether the colour of the text appears - reversed against a background colour. - Line style details if "underlined" is TRUE""" + + * fontFamily -- + For instance: Arial, Times New Roman. + Add more information if necessary + (e.g. blackletter, antiqua). + + * serif -- + Serif or sans-serif typeface. + + * fontSize -- + The size of the characters in points. + + * xHeight -- + The x-height or corpus size refers to the distance + between the baseline and the mean line of + lower-case letters in a typeface. + The unit is assumed to be pixels. + + * kerning -- + The degree of space (in points) between + the characters in a string of text. + + * textColourRgb -- + Text colour in RGB encoded format + (red value) + (256 x green value) + (65536 x blue value). + + * bgColour -- Background colour + * bgColourRgb -- + Background colour in RGB encoded format + (red value) + (256 x green value) + (65536 x blue value). + + * reverseVideo -- + Specifies whether the colour of the text appears + reversed against a background colour. + + * underlineStyle -- Line style details if "underlined" is TRUE + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('fontFamily', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('serif', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('monospace', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('fontSize', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('xHeight', 'integer', 0, 1, {'use': 'optional'}), - MemberSpec_('kerning', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('textColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('textColourRgb', 'integer', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColourRgb', 'integer', 0, 1, {'use': 'optional'}), - MemberSpec_('reverseVideo', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('bold', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('italic', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('underlined', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('underlineStyle', 'pc:UnderlineStyleSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('subscript', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('superscript', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('strikethrough', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('smallCaps', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('letterSpaced', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('fontFamily', 'string', 0, 1, {'use': 'optional', 'name': 'fontFamily'}), + MemberSpec_('serif', 'boolean', 0, 1, {'use': 'optional', 'name': 'serif'}), + MemberSpec_('monospace', 'boolean', 0, 1, {'use': 'optional', 'name': 'monospace'}), + MemberSpec_('fontSize', 'float', 0, 1, {'use': 'optional', 'name': 'fontSize'}), + MemberSpec_('xHeight', 'integer', 0, 1, {'use': 'optional', 'name': 'xHeight'}), + MemberSpec_('kerning', 'int', 0, 1, {'use': 'optional', 'name': 'kerning'}), + MemberSpec_('textColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'textColour'}), + MemberSpec_('textColourRgb', 'integer', 0, 1, {'use': 'optional', 'name': 'textColourRgb'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), + MemberSpec_('bgColourRgb', 'integer', 0, 1, {'use': 'optional', 'name': 'bgColourRgb'}), + MemberSpec_('reverseVideo', 'boolean', 0, 1, {'use': 'optional', 'name': 'reverseVideo'}), + MemberSpec_('bold', 'boolean', 0, 1, {'use': 'optional', 'name': 'bold'}), + MemberSpec_('italic', 'boolean', 0, 1, {'use': 'optional', 'name': 'italic'}), + MemberSpec_('underlined', 'boolean', 0, 1, {'use': 'optional', 'name': 'underlined'}), + MemberSpec_('underlineStyle', 'pc:UnderlineStyleSimpleType', 0, 1, {'use': 'optional', 'name': 'underlineStyle'}), + MemberSpec_('subscript', 'boolean', 0, 1, {'use': 'optional', 'name': 'subscript'}), + MemberSpec_('superscript', 'boolean', 0, 1, {'use': 'optional', 'name': 'superscript'}), + MemberSpec_('strikethrough', 'boolean', 0, 1, {'use': 'optional', 'name': 'strikethrough'}), + MemberSpec_('smallCaps', 'boolean', 0, 1, {'use': 'optional', 'name': 'smallCaps'}), + MemberSpec_('letterSpaced', 'boolean', 0, 1, {'use': 'optional', 'name': 'letterSpaced'}), ] subclass = None superclass = None @@ -8619,7 +9472,7 @@ def __init__(self, fontFamily=None, serif=None, monospace=None, fontSize=None, x self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.fontFamily = _cast(None, fontFamily) self.fontFamily_nsprefix_ = "pc" self.serif = _cast(bool, serif) @@ -8781,7 +9634,7 @@ def validate_UnderlineStyleSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on UnderlineStyleSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( ): @@ -8803,14 +9656,14 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextStyleType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextStyleType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextStyleType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextStyleType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextStyleType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextStyleType'): if self.fontFamily is not None and 'fontFamily' not in already_processed: already_processed.add('fontFamily') outfile.write(' fontFamily=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.fontFamily), input_name='fontFamily')), )) @@ -8871,9 +9724,9 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.letterSpaced is not None and 'letterSpaced' not in already_processed: already_processed.add('letterSpaced') outfile.write(' letterSpaced="%s"' % self.gds_format_boolean(self.letterSpaced, input_name='letterSpaced')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextStyleType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextStyleType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='TextStyleType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='TextStyleType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -8920,6 +9773,8 @@ def to_etree(self, parent_element=None, name_='TextStyleType', mapping_=None, ns element.set('letterSpaced', self.gds_format_boolean(self.letterSpaced)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -8927,12 +9782,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('fontFamily', node) if value is not None and 'fontFamily' not in already_processed: already_processed.add('fontFamily') @@ -9072,7 +9927,7 @@ def buildAttributes(self, node, attrs, already_processed): self.letterSpaced = False else: raise_parse_error(node, 'Bad boolean attribute') - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -9080,15 +9935,27 @@ def __hash__(self): class RegionType(GeneratedsSuper): - """For generic use + """custom -- For generic use + continuation -- Is this region a continuation of another region - (in previous column or page, for example)?""" + (in previous column or page, for example)? + + * AlternativeImage -- + Alternative region images + (e.g. black-and-white). + + * Labels -- Semantic labels / tags + * Roles -- + Roles the region takes + (e.g. in context of a parent region). + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), + MemberSpec_('continuation', 'boolean', 0, 1, {'use': 'optional', 'name': 'continuation'}), MemberSpec_('AlternativeImage', 'AlternativeImageType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'AlternativeImage', 'type': 'AlternativeImageType'}, None), MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), MemberSpec_('UserDefined', 'UserDefinedType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'UserDefined', 'type': 'UserDefinedType'}, None), @@ -9116,7 +9983,7 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.custom = _cast(None, custom) @@ -9417,7 +10284,7 @@ def set_continuation(self, continuation): self.continuation = continuation def get_extensiontype_(self): return self.extensiontype_ def set_extensiontype_(self, extensiontype_): self.extensiontype_ = extensiontype_ - def hasContent_(self): + def has__content(self): if ( self.AlternativeImage or self.Coords is not None or @@ -9457,15 +10324,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RegionType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RegionType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -9486,7 +10353,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' outfile.write(' xsi:type="%s%s"' % (imported_ns_type_prefix_, self.extensiontype_)) else: outfile.write(' xsi:type="%s"' % self.extensiontype_) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -9548,7 +10415,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for CustomRegion_ in self.CustomRegion: namespaceprefix_ = self.CustomRegion_nsprefix_ + ':' if (UseCapturedNS_ and self.CustomRegion_nsprefix_) else '' CustomRegion_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='CustomRegion', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='RegionType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='RegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -9564,48 +10431,50 @@ def to_etree(self, parent_element=None, name_='RegionType', mapping_=None, nsmap if self.continuation is not None: element.set('continuation', self.gds_format_boolean(self.continuation)) for AlternativeImage_ in self.AlternativeImage: - AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, nsmap_=nsmap_) + AlternativeImage_.to_etree(element, name_='AlternativeImage', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Coords is not None: Coords_ = self.Coords - Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) + Coords_.to_etree(element, name_='Coords', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.UserDefined is not None: UserDefined_ = self.UserDefined - UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, nsmap_=nsmap_) + UserDefined_.to_etree(element, name_='UserDefined', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for Labels_ in self.Labels: - Labels_.to_etree(element, name_='Labels', mapping_=mapping_, nsmap_=nsmap_) + Labels_.to_etree(element, name_='Labels', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.Roles is not None: Roles_ = self.Roles - Roles_.to_etree(element, name_='Roles', mapping_=mapping_, nsmap_=nsmap_) + Roles_.to_etree(element, name_='Roles', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TextRegion_ in self.TextRegion: - TextRegion_.to_etree(element, name_='TextRegion', mapping_=mapping_, nsmap_=nsmap_) + TextRegion_.to_etree(element, name_='TextRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for ImageRegion_ in self.ImageRegion: - ImageRegion_.to_etree(element, name_='ImageRegion', mapping_=mapping_, nsmap_=nsmap_) + ImageRegion_.to_etree(element, name_='ImageRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for LineDrawingRegion_ in self.LineDrawingRegion: - LineDrawingRegion_.to_etree(element, name_='LineDrawingRegion', mapping_=mapping_, nsmap_=nsmap_) + LineDrawingRegion_.to_etree(element, name_='LineDrawingRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for GraphicRegion_ in self.GraphicRegion: - GraphicRegion_.to_etree(element, name_='GraphicRegion', mapping_=mapping_, nsmap_=nsmap_) + GraphicRegion_.to_etree(element, name_='GraphicRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TableRegion_ in self.TableRegion: - TableRegion_.to_etree(element, name_='TableRegion', mapping_=mapping_, nsmap_=nsmap_) + TableRegion_.to_etree(element, name_='TableRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for ChartRegion_ in self.ChartRegion: - ChartRegion_.to_etree(element, name_='ChartRegion', mapping_=mapping_, nsmap_=nsmap_) + ChartRegion_.to_etree(element, name_='ChartRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for SeparatorRegion_ in self.SeparatorRegion: - SeparatorRegion_.to_etree(element, name_='SeparatorRegion', mapping_=mapping_, nsmap_=nsmap_) + SeparatorRegion_.to_etree(element, name_='SeparatorRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for MathsRegion_ in self.MathsRegion: - MathsRegion_.to_etree(element, name_='MathsRegion', mapping_=mapping_, nsmap_=nsmap_) + MathsRegion_.to_etree(element, name_='MathsRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for ChemRegion_ in self.ChemRegion: - ChemRegion_.to_etree(element, name_='ChemRegion', mapping_=mapping_, nsmap_=nsmap_) + ChemRegion_.to_etree(element, name_='ChemRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for MusicRegion_ in self.MusicRegion: - MusicRegion_.to_etree(element, name_='MusicRegion', mapping_=mapping_, nsmap_=nsmap_) + MusicRegion_.to_etree(element, name_='MusicRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for AdvertRegion_ in self.AdvertRegion: - AdvertRegion_.to_etree(element, name_='AdvertRegion', mapping_=mapping_, nsmap_=nsmap_) + AdvertRegion_.to_etree(element, name_='AdvertRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for NoiseRegion_ in self.NoiseRegion: - NoiseRegion_.to_etree(element, name_='NoiseRegion', mapping_=mapping_, nsmap_=nsmap_) + NoiseRegion_.to_etree(element, name_='NoiseRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for UnknownRegion_ in self.UnknownRegion: - UnknownRegion_.to_etree(element, name_='UnknownRegion', mapping_=mapping_, nsmap_=nsmap_) + UnknownRegion_.to_etree(element, name_='UnknownRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for CustomRegion_ in self.CustomRegion: - CustomRegion_.to_etree(element, name_='CustomRegion', mapping_=mapping_, nsmap_=nsmap_) + CustomRegion_.to_etree(element, name_='CustomRegion', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -9613,12 +10482,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -9644,7 +10513,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'xsi:type' not in already_processed: already_processed.add('xsi:type') self.extensiontype_ = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'AlternativeImage': obj_ = AlternativeImageType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -9791,12 +10660,14 @@ def set_Coords(self, Coords): class AlternativeImageType(GeneratedsSuper): - """Confidence value (between 0 and 1)""" + """conf -- Confidence value (between 0 and 1) + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('filename', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('filename', 'string', 0, 0, {'use': 'required', 'name': 'filename'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), + MemberSpec_('conf', 'pc:ConfSimpleType', 0, 1, {'use': 'optional', 'name': 'conf'}), ] subclass = None superclass = None @@ -9805,7 +10676,7 @@ def __init__(self, filename=None, comments=None, conf=None, gds_collector_=None, self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.filename = _cast(None, filename) self.filename_nsprefix_ = "pc" self.comments = _cast(None, comments) @@ -9854,7 +10725,7 @@ def validate_ConfSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd maxInclusive restriction on ConfSimpleType' % {"value": value, "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( ): @@ -9876,14 +10747,14 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AlternativeImageType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AlternativeImageType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='AlternativeImageType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='AlternativeImageType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='AlternativeImageType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='AlternativeImageType'): if self.filename is not None and 'filename' not in already_processed: already_processed.add('filename') outfile.write(' filename=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.filename), input_name='filename')), )) @@ -9893,9 +10764,9 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.conf is not None and 'conf' not in already_processed: already_processed.add('conf') outfile.write(' conf="%s"' % self.gds_format_float(self.conf, input_name='conf')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AlternativeImageType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AlternativeImageType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='AlternativeImageType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='AlternativeImageType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -9908,6 +10779,8 @@ def to_etree(self, parent_element=None, name_='AlternativeImageType', mapping_=N element.set('conf', self.gds_format_float(self.conf)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -9915,12 +10788,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('filename', node) if value is not None and 'filename' not in already_processed: already_processed.add('filename') @@ -9935,7 +10808,7 @@ def buildAttributes(self, node, attrs, already_processed): value = self.gds_parse_float(value, node, 'conf') self.conf = value self.validate_ConfSimpleType(self.conf) # validate type ConfSimpleType - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -9943,8 +10816,11 @@ def __hash__(self): class GraphemesType(GeneratedsSuper): - """Container for graphemes, grapheme groups and - non-printing characters.""" + """GraphemesType -- + Container for graphemes, grapheme groups and + non-printing characters. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('Grapheme', 'GraphemeType', 1, 0, {'name': 'Grapheme', 'type': 'GraphemeType'}, 8), @@ -9958,7 +10834,7 @@ def __init__(self, Grapheme=None, NonPrintingChar=None, GraphemeGroup=None, gds_ self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" if Grapheme is None: self.Grapheme = [] else: @@ -10019,7 +10895,7 @@ def insert_GraphemeGroup_at(self, index, value): self.GraphemeGroup.insert(index, value) def replace_GraphemeGroup_at(self, index, value): self.GraphemeGroup[index] = value - def hasContent_(self): + def has__content(self): if ( self.Grapheme or self.NonPrintingChar or @@ -10043,17 +10919,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemesType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemesType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemesType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemesType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemesType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemesType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemesType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemesType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -10067,19 +10943,21 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for GraphemeGroup_ in self.GraphemeGroup: namespaceprefix_ = self.GraphemeGroup_nsprefix_ + ':' if (UseCapturedNS_ and self.GraphemeGroup_nsprefix_) else '' GraphemeGroup_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='GraphemeGroup', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='GraphemesType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='GraphemesType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) for Grapheme_ in self.Grapheme: - Grapheme_.to_etree(element, name_='Grapheme', mapping_=mapping_, nsmap_=nsmap_) + Grapheme_.to_etree(element, name_='Grapheme', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for NonPrintingChar_ in self.NonPrintingChar: - NonPrintingChar_.to_etree(element, name_='NonPrintingChar', mapping_=mapping_, nsmap_=nsmap_) + NonPrintingChar_.to_etree(element, name_='NonPrintingChar', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for GraphemeGroup_ in self.GraphemeGroup: - GraphemeGroup_.to_etree(element, name_='GraphemeGroup', mapping_=mapping_, nsmap_=nsmap_) + GraphemeGroup_.to_etree(element, name_='GraphemeGroup', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -10087,14 +10965,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Grapheme': obj_ = GraphemeType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -10116,20 +10994,29 @@ def __hash__(self): class GraphemeBaseType(GeneratedsSuper): - """Base type for graphemes, grapheme groups and non-printing characters. - Order index of grapheme, group, or non-printing character - within the parent container (graphemes or glyph or grapheme group). - Type of character represented by the - grapheme, group, or non-printing character element. - For generic useFor generic use""" + """GraphemeBaseType -- + Base type for graphemes, grapheme groups and non-printing characters. + + * index -- + Order index of grapheme, group, or non-printing character + within the parent container (graphemes or glyph or grapheme group). + + * charType -- + Type of character represented by the + grapheme, group, or non-printing character element. + + * custom -- For generic use + * comments -- For generic use + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('id', 'string', 0, 0, {'use': 'required'}), - MemberSpec_('index', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('ligature', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('charType', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('custom', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('comments', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('id', 'string', 0, 0, {'use': 'required', 'name': 'id'}), + MemberSpec_('index', 'indexType2', 0, 0, {'use': 'required', 'name': 'index'}), + MemberSpec_('ligature', 'boolean', 0, 1, {'use': 'optional', 'name': 'ligature'}), + MemberSpec_('charType', 'charTypeType', 0, 1, {'use': 'optional', 'name': 'charType'}), + MemberSpec_('custom', 'string', 0, 1, {'use': 'optional', 'name': 'custom'}), + MemberSpec_('comments', 'string', 0, 1, {'use': 'optional', 'name': 'comments'}), MemberSpec_('TextEquiv', 'TextEquivType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'TextEquiv', 'type': 'TextEquivType'}, None), ] subclass = None @@ -10139,7 +11026,7 @@ def __init__(self, id=None, index=None, ligature=None, charType=None, custom=Non self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.id = _cast(None, id) self.id_nsprefix_ = "pc" self.index = _cast(int, index) @@ -10209,7 +11096,31 @@ def set_comments(self, comments): self.comments = comments def get_extensiontype_(self): return self.extensiontype_ def set_extensiontype_(self, extensiontype_): self.extensiontype_ = extensiontype_ - def hasContent_(self): + def validate_indexType2(self, value): + # Validate type indexType2, a restriction on int. + if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: + if not isinstance(value, int): + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (int)' % {"value": value, "lineno": lineno, }) + return False + if value < 0: + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd minInclusive restriction on indexType2' % {"value": value, "lineno": lineno} ) + result = False + def validate_charTypeType(self, value): + # Validate type charTypeType, a restriction on string. + if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: + if not isinstance(value, str): + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) + return False + value = value + enumerations = ['base', 'combining'] + if value not in enumerations: + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on charTypeType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) + result = False + def has__content(self): if ( self.TextEquiv ): @@ -10231,15 +11142,15 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeBaseType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeBaseType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemeBaseType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemeBaseType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemeBaseType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemeBaseType'): if self.id is not None and 'id' not in already_processed: already_processed.add('id') outfile.write(' id=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.id), input_name='id')), )) @@ -10266,7 +11177,7 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' outfile.write(' xsi:type="%s%s"' % (imported_ns_type_prefix_, self.extensiontype_)) else: outfile.write(' xsi:type="%s"' % self.extensiontype_) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemeBaseType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemeBaseType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -10274,7 +11185,7 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for TextEquiv_ in self.TextEquiv: namespaceprefix_ = self.TextEquiv_nsprefix_ + ':' if (UseCapturedNS_ and self.TextEquiv_nsprefix_) else '' TextEquiv_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TextEquiv', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='GraphemeBaseType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='GraphemeBaseType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -10294,9 +11205,11 @@ def to_etree(self, parent_element=None, name_='GraphemeBaseType', mapping_=None, if self.comments is not None: element.set('comments', self.gds_format_string(self.comments)) for TextEquiv_ in self.TextEquiv: - TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, nsmap_=nsmap_) + TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -10304,12 +11217,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('id', node) if value is not None and 'id' not in already_processed: already_processed.add('id') @@ -10318,6 +11231,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'index' not in already_processed: already_processed.add('index') self.index = self.gds_parse_integer(value, node, 'index') + self.validate_indexType2(self.index) # validate type indexType2 value = find_attr_value_('ligature', node) if value is not None and 'ligature' not in already_processed: already_processed.add('ligature') @@ -10331,6 +11245,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'charType' not in already_processed: already_processed.add('charType') self.charType = value + self.validate_charTypeType(self.charType) # validate type charTypeType value = find_attr_value_('custom', node) if value is not None and 'custom' not in already_processed: already_processed.add('custom') @@ -10343,7 +11258,7 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'xsi:type' not in already_processed: already_processed.add('xsi:type') self.extensiontype_ = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'TextEquiv': obj_ = TextEquivType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -10355,9 +11270,12 @@ def __hash__(self): class GraphemeType(GraphemeBaseType): - """Represents a sub-element of a glyph. + """GraphemeType -- + Represents a sub-element of a glyph. Smallest graphical unit that can be - assigned a Unicode code point.""" + assigned a Unicode code point. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('Coords', 'CoordsType', 0, 0, {'name': 'Coords', 'type': 'CoordsType'}, None), @@ -10369,8 +11287,8 @@ def __init__(self, id=None, index=None, ligature=None, charType=None, custom=Non self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(GraphemeType, self).__init__(id, index, ligature, charType, custom, comments, TextEquiv, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("GraphemeType"), self).__init__(id, index, ligature, charType, custom, comments, TextEquiv, **kwargs_) self.Coords = Coords self.Coords_nsprefix_ = "pc" def factory(*args_, **kwargs_): @@ -10392,10 +11310,10 @@ def get_Coords(self): return self.Coords def set_Coords(self, Coords): self.Coords = Coords - def hasContent_(self): + def has__content(self): if ( self.Coords is not None or - super(GraphemeType, self).hasContent_() + super(GraphemeType, self).has__content() ): return True else: @@ -10415,18 +11333,18 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemeType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemeType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemeType'): - super(GraphemeType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeType') - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemeType', fromsubclass_=False, pretty_print=True): - super(GraphemeType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemeType'): + super(GraphemeType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeType') + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemeType', fromsubclass_=False, pretty_print=True): + super(GraphemeType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) if pretty_print: eol_ = '\n' else: @@ -10434,13 +11352,15 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml if self.Coords is not None: namespaceprefix_ = self.Coords_nsprefix_ + ':' if (UseCapturedNS_ and self.Coords_nsprefix_) else '' self.Coords.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Coords', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='GraphemeType', mapping_=None, nsmap_=None): - element = super(GraphemeType, self).to_etree(parent_element, name_, mapping_) + def to_etree(self, parent_element=None, name_='GraphemeType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(GraphemeType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.Coords is not None: Coords_ = self.Coords - Coords_.to_etree(element, name_='Coords', mapping_=mapping_, nsmap_=nsmap_) + Coords_.to_etree(element, name_='Coords', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -10448,30 +11368,33 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): - super(GraphemeType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildAttributes(self, node, attrs, already_processed): + super(GraphemeType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Coords': obj_ = CoordsType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Coords = obj_ obj_.original_tagname_ = 'Coords' - super(GraphemeType, self).buildChildren(child_, node, nodeName_, True) + super(GraphemeType, self)._buildChildren(child_, node, nodeName_, True) def __hash__(self): return hash(self.id) # end class GraphemeType class NonPrintingCharType(GraphemeBaseType): - """A glyph component without visual representation + """NonPrintingCharType -- + A glyph component without visual representation but with Unicode code point. Non-visual / non-printing / control character. - Part of grapheme container (of glyph) or grapheme sub group.""" + Part of grapheme container (of glyph) or grapheme sub group. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ ] @@ -10482,8 +11405,8 @@ def __init__(self, id=None, index=None, ligature=None, charType=None, custom=Non self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(NonPrintingCharType, self).__init__(id, index, ligature, charType, custom, comments, TextEquiv, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("NonPrintingCharType"), self).__init__(id, index, ligature, charType, custom, comments, TextEquiv, **kwargs_) def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( @@ -10499,9 +11422,9 @@ def get_ns_prefix_(self): return self.ns_prefix_ def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix - def hasContent_(self): + def has__content(self): if ( - super(NonPrintingCharType, self).hasContent_() + super(NonPrintingCharType, self).has__content() ): return True else: @@ -10521,22 +11444,24 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NonPrintingCharType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NonPrintingCharType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='NonPrintingCharType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='NonPrintingCharType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='NonPrintingCharType'): - super(NonPrintingCharType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NonPrintingCharType') - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='NonPrintingCharType', fromsubclass_=False, pretty_print=True): - super(NonPrintingCharType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='NonPrintingCharType', mapping_=None, nsmap_=None): - element = super(NonPrintingCharType, self).to_etree(parent_element, name_, mapping_) + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='NonPrintingCharType'): + super(NonPrintingCharType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NonPrintingCharType') + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='NonPrintingCharType', fromsubclass_=False, pretty_print=True): + super(NonPrintingCharType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='NonPrintingCharType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(NonPrintingCharType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -10544,15 +11469,15 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): - super(NonPrintingCharType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(NonPrintingCharType, self).buildChildren(child_, node, nodeName_, True) + def _buildAttributes(self, node, attrs, already_processed): + super(NonPrintingCharType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(NonPrintingCharType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -10572,8 +11497,8 @@ def __init__(self, id=None, index=None, ligature=None, charType=None, custom=Non self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(GraphemeGroupType, self).__init__(id, index, ligature, charType, custom, comments, TextEquiv, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("GraphemeGroupType"), self).__init__(id, index, ligature, charType, custom, comments, TextEquiv, **kwargs_) if Grapheme is None: self.Grapheme = [] else: @@ -10619,11 +11544,11 @@ def insert_NonPrintingChar_at(self, index, value): self.NonPrintingChar.insert(index, value) def replace_NonPrintingChar_at(self, index, value): self.NonPrintingChar[index] = value - def hasContent_(self): + def has__content(self): if ( self.Grapheme or self.NonPrintingChar or - super(GraphemeGroupType, self).hasContent_() + super(GraphemeGroupType, self).has__content() ): return True else: @@ -10643,18 +11568,18 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeGroupType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeGroupType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemeGroupType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphemeGroupType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemeGroupType'): - super(GraphemeGroupType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeGroupType') - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemeGroupType', fromsubclass_=False, pretty_print=True): - super(GraphemeGroupType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphemeGroupType'): + super(GraphemeGroupType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphemeGroupType') + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphemeGroupType', fromsubclass_=False, pretty_print=True): + super(GraphemeGroupType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) if pretty_print: eol_ = '\n' else: @@ -10665,14 +11590,16 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for NonPrintingChar_ in self.NonPrintingChar: namespaceprefix_ = self.NonPrintingChar_nsprefix_ + ':' if (UseCapturedNS_ and self.NonPrintingChar_nsprefix_) else '' NonPrintingChar_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='NonPrintingChar', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='GraphemeGroupType', mapping_=None, nsmap_=None): - element = super(GraphemeGroupType, self).to_etree(parent_element, name_, mapping_) + def to_etree(self, parent_element=None, name_='GraphemeGroupType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(GraphemeGroupType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) for Grapheme_ in self.Grapheme: - Grapheme_.to_etree(element, name_='Grapheme', mapping_=mapping_, nsmap_=nsmap_) + Grapheme_.to_etree(element, name_='Grapheme', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for NonPrintingChar_ in self.NonPrintingChar: - NonPrintingChar_.to_etree(element, name_='NonPrintingChar', mapping_=mapping_, nsmap_=nsmap_) + NonPrintingChar_.to_etree(element, name_='NonPrintingChar', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -10680,14 +11607,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): - super(GraphemeGroupType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildAttributes(self, node, attrs, already_processed): + super(GraphemeGroupType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Grapheme': obj_ = GraphemeType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -10698,14 +11625,16 @@ def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collec obj_.build(child_, gds_collector_=gds_collector_) self.NonPrintingChar.append(obj_) obj_.original_tagname_ = 'NonPrintingChar' - super(GraphemeGroupType, self).buildChildren(child_, node, nodeName_, True) + super(GraphemeGroupType, self)._buildChildren(child_, node, nodeName_, True) def __hash__(self): return hash(self.id) # end class GraphemeGroupType class UserDefinedType(GeneratedsSuper): - """Container for user-defined attributes""" + """UserDefinedType -- Container for user-defined attributes + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('UserAttribute', 'UserAttributeType', 1, 0, {'maxOccurs': 'unbounded', 'minOccurs': '1', 'name': 'UserAttribute', 'type': 'UserAttributeType'}, None), @@ -10717,7 +11646,7 @@ def __init__(self, UserAttribute=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" if UserAttribute is None: self.UserAttribute = [] else: @@ -10748,7 +11677,7 @@ def insert_UserAttribute_at(self, index, value): self.UserAttribute.insert(index, value) def replace_UserAttribute_at(self, index, value): self.UserAttribute[index] = value - def hasContent_(self): + def has__content(self): if ( self.UserAttribute ): @@ -10770,17 +11699,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UserDefinedType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UserDefinedType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UserDefinedType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UserDefinedType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UserDefinedType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UserDefinedType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UserDefinedType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UserDefinedType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -10788,15 +11717,17 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml for UserAttribute_ in self.UserAttribute: namespaceprefix_ = self.UserAttribute_nsprefix_ + ':' if (UseCapturedNS_ and self.UserAttribute_nsprefix_) else '' UserAttribute_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UserAttribute', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='UserDefinedType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='UserDefinedType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) for UserAttribute_ in self.UserAttribute: - UserAttribute_.to_etree(element, name_='UserAttribute', mapping_=mapping_, nsmap_=nsmap_) + UserAttribute_.to_etree(element, name_='UserAttribute', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -10804,14 +11735,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'UserAttribute': obj_ = UserAttributeType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -10823,13 +11754,15 @@ def __hash__(self): class UserAttributeType(GeneratedsSuper): - """Structured custom data defined by name, type and value.""" + """UserAttributeType -- Structured custom data defined by name, type and value. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('name', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('description', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'string', 0, 1, {'use': 'optional'}), - MemberSpec_('value', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('name', 'string', 0, 1, {'use': 'optional', 'name': 'name'}), + MemberSpec_('description', 'string', 0, 1, {'use': 'optional', 'name': 'description'}), + MemberSpec_('type_', 'typeType3', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('value', 'string', 0, 1, {'use': 'optional', 'name': 'value'}), ] subclass = None superclass = None @@ -10838,7 +11771,7 @@ def __init__(self, name=None, description=None, type_=None, value=None, gds_coll self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.name = _cast(None, name) self.name_nsprefix_ = "pc" self.description = _cast(None, description) @@ -10878,7 +11811,20 @@ def get_value(self): return self.value def set_value(self, value): self.value = value - def hasContent_(self): + def validate_typeType3(self, value): + # Validate type typeType3, a restriction on string. + if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: + if not isinstance(value, str): + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) + return False + value = value + enumerations = ['xsd:string', 'xsd:integer', 'xsd:boolean', 'xsd:float'] + if value not in enumerations: + lineno = self.gds_get_node_lineno_() + self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on typeType3' % {"value" : encode_str_2_3(value), "lineno": lineno} ) + result = False + def has__content(self): if ( ): @@ -10900,14 +11846,14 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UserAttributeType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UserAttributeType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UserAttributeType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UserAttributeType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UserAttributeType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UserAttributeType'): if self.name is not None and 'name' not in already_processed: already_processed.add('name') outfile.write(' name=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.name), input_name='name')), )) @@ -10920,9 +11866,9 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.value is not None and 'value' not in already_processed: already_processed.add('value') outfile.write(' value=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.value), input_name='value')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UserAttributeType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UserAttributeType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='UserAttributeType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='UserAttributeType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -10937,6 +11883,8 @@ def to_etree(self, parent_element=None, name_='UserAttributeType', mapping_=None element.set('value', self.gds_format_string(self.value)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -10944,12 +11892,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('name', node) if value is not None and 'name' not in already_processed: already_processed.add('name') @@ -10962,11 +11910,12 @@ def buildAttributes(self, node, attrs, already_processed): if value is not None and 'type' not in already_processed: already_processed.add('type') self.type_ = value + self.validate_typeType3(self.type_) # validate type typeType3 value = find_attr_value_('value', node) if value is not None and 'value' not in already_processed: already_processed.add('value') self.value = value - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -10974,17 +11923,21 @@ def __hash__(self): class TableCellRoleType(GeneratedsSuper): - """Cell position in table starting with row 0Cell position in table - starting with column 0Number of rows the cell spans (optional; default - is 1)Number of columns the cell spans (optional; default is 1) - Is the cell a column or row header?""" + """rowIndex -- Cell position in table starting with row 0 + columnIndex -- Cell position in table starting with column 0 + rowSpan -- Number of rows the cell spans (optional; default is 1) + colSpan -- Number of columns the cell spans (optional; default is 1) + header -- + Is the cell a column or row header? + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('rowIndex', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('columnIndex', 'int', 0, 0, {'use': 'required'}), - MemberSpec_('rowSpan', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('colSpan', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('header', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('rowIndex', 'int', 0, 0, {'use': 'required', 'name': 'rowIndex'}), + MemberSpec_('columnIndex', 'int', 0, 0, {'use': 'required', 'name': 'columnIndex'}), + MemberSpec_('rowSpan', 'int', 0, 1, {'use': 'optional', 'name': 'rowSpan'}), + MemberSpec_('colSpan', 'int', 0, 1, {'use': 'optional', 'name': 'colSpan'}), + MemberSpec_('header', 'boolean', 0, 1, {'use': 'optional', 'name': 'header'}), ] subclass = None superclass = None @@ -10993,7 +11946,7 @@ def __init__(self, rowIndex=None, columnIndex=None, rowSpan=None, colSpan=None, self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.rowIndex = _cast(int, rowIndex) self.rowIndex_nsprefix_ = "pc" self.columnIndex = _cast(int, columnIndex) @@ -11039,7 +11992,7 @@ def get_header(self): return self.header def set_header(self, header): self.header = header - def hasContent_(self): + def has__content(self): if ( ): @@ -11061,14 +12014,14 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TableCellRoleType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TableCellRoleType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TableCellRoleType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TableCellRoleType', pretty_print=pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TableCellRoleType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TableCellRoleType'): if self.rowIndex is not None and 'rowIndex' not in already_processed: already_processed.add('rowIndex') outfile.write(' rowIndex="%s"' % self.gds_format_integer(self.rowIndex, input_name='rowIndex')) @@ -11084,9 +12037,9 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.header is not None and 'header' not in already_processed: already_processed.add('header') outfile.write(' header="%s"' % self.gds_format_boolean(self.header, input_name='header')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TableCellRoleType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TableCellRoleType', fromsubclass_=False, pretty_print=True): pass - def to_etree(self, parent_element=None, name_='TableCellRoleType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='TableCellRoleType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: @@ -11103,6 +12056,8 @@ def to_etree(self, parent_element=None, name_='TableCellRoleType', mapping_=None element.set('header', self.gds_format_boolean(self.header)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11110,12 +12065,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('rowIndex', node) if value is not None and 'rowIndex' not in already_processed: already_processed.add('rowIndex') @@ -11141,7 +12096,7 @@ def buildAttributes(self, node, attrs, already_processed): self.header = False else: raise_parse_error(node, 'Bad boolean attribute') - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): pass def __hash__(self): return hash(self.id) @@ -11149,6 +12104,11 @@ def __hash__(self): class RolesType(GeneratedsSuper): + """TableCellRole -- + Data for a region that takes on the role + of a table cell within a parent table region. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ MemberSpec_('TableCellRole', 'TableCellRoleType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'TableCellRole', 'type': 'TableCellRoleType'}, None), @@ -11160,7 +12120,7 @@ def __init__(self, TableCellRole=None, gds_collector_=None, **kwargs_): self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None + self.ns_prefix_ = "pc" self.TableCellRole = TableCellRole self.TableCellRole_nsprefix_ = "pc" def factory(*args_, **kwargs_): @@ -11182,7 +12142,7 @@ def get_TableCellRole(self): return self.TableCellRole def set_TableCellRole(self, TableCellRole): self.TableCellRole = TableCellRole - def hasContent_(self): + def has__content(self): if ( self.TableCellRole is not None ): @@ -11204,17 +12164,17 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="h showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RolesType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='RolesType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RolesType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='RolesType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RolesType'): + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='RolesType'): pass - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RolesType', fromsubclass_=False, pretty_print=True): + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RolesType', fromsubclass_=False, pretty_print=True): if pretty_print: eol_ = '\n' else: @@ -11222,16 +12182,18 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xml if self.TableCellRole is not None: namespaceprefix_ = self.TableCellRole_nsprefix_ + ':' if (UseCapturedNS_ and self.TableCellRole_nsprefix_) else '' self.TableCellRole.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TableCellRole', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='RolesType', mapping_=None, nsmap_=None): + def to_etree(self, parent_element=None, name_='RolesType', mapping_=None, reverse_mapping_=None, nsmap_=None): if parent_element is None: element = etree_.Element('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) else: element = etree_.SubElement(parent_element, '{http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15}' + name_, nsmap=nsmap_) if self.TableCellRole is not None: TableCellRole_ = self.TableCellRole - TableCellRole_.to_etree(element, name_='TableCellRole', mapping_=mapping_, nsmap_=nsmap_) + TableCellRole_.to_etree(element, name_='TableCellRole', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11239,14 +12201,14 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): pass - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'TableCellRole': obj_ = TableCellRoleType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -11258,14 +12220,19 @@ def __hash__(self): class CustomRegionType(RegionType): - """Regions containing content that is not covered + """CustomRegionType -- + Regions containing content that is not covered by the default types (text, graphic, image, line drawing, chart, table, separator, maths, map, music, chem, advert, noise, unknown). - Information on the type of content represented by this region""" + + * type -- + Information on the type of content represented by this region + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('type_', 'string', 0, 1, {'use': 'optional'}), + MemberSpec_('type_', 'string', 0, 1, {'use': 'optional', 'name': 'type_'}), ] subclass = None superclass = RegionType @@ -11274,8 +12241,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(CustomRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("CustomRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.type_ = _cast(None, type_) self.type__nsprefix_ = "pc" def factory(*args_, **kwargs_): @@ -11297,14 +12264,14 @@ def get_type(self): return self.type_ def set_type(self, type_): self.type_ = type_ - def hasContent_(self): + def has__content(self): if ( - super(CustomRegionType, self).hasContent_() + super(CustomRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='CustomRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CustomRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('CustomRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -11319,27 +12286,29 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='C showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='CustomRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='CustomRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='CustomRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='CustomRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='CustomRegionType'): - super(CustomRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='CustomRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='CustomRegionType'): + super(CustomRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='CustomRegionType') if self.type_ is not None and 'type_' not in already_processed: already_processed.add('type_') outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='CustomRegionType', fromsubclass_=False, pretty_print=True): - super(CustomRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='CustomRegionType', mapping_=None, nsmap_=None): - element = super(CustomRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CustomRegionType', fromsubclass_=False, pretty_print=True): + super(CustomRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='CustomRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(CustomRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.type_ is not None: element.set('type', self.gds_format_string(self.type_)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11347,19 +12316,19 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('type', node) if value is not None and 'type' not in already_processed: already_processed.add('type') self.type_ = value - super(CustomRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(CustomRegionType, self).buildChildren(child_, node, nodeName_, True) + super(CustomRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(CustomRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -11367,7 +12336,10 @@ def __hash__(self): class UnknownRegionType(RegionType): - """To be used if the region type cannot be ascertained.""" + """UnknownRegionType -- + To be used if the region type cannot be ascertained. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ ] @@ -11378,8 +12350,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(UnknownRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("UnknownRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( @@ -11395,14 +12367,14 @@ def get_ns_prefix_(self): return self.ns_prefix_ def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix - def hasContent_(self): + def has__content(self): if ( - super(UnknownRegionType, self).hasContent_() + super(UnknownRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='UnknownRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UnknownRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('UnknownRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -11417,22 +12389,24 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='U showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnknownRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnknownRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UnknownRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='UnknownRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UnknownRegionType'): - super(UnknownRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnknownRegionType') - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='UnknownRegionType', fromsubclass_=False, pretty_print=True): - super(UnknownRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='UnknownRegionType', mapping_=None, nsmap_=None): - element = super(UnknownRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='UnknownRegionType'): + super(UnknownRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='UnknownRegionType') + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='UnknownRegionType', fromsubclass_=False, pretty_print=True): + super(UnknownRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='UnknownRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(UnknownRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11440,15 +12414,15 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): - super(UnknownRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(UnknownRegionType, self).buildChildren(child_, node, nodeName_, True) + def _buildAttributes(self, node, attrs, already_processed): + super(UnknownRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(UnknownRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -11456,9 +12430,12 @@ def __hash__(self): class NoiseRegionType(RegionType): - """Noise regions are regions where no real data lies, only + """NoiseRegionType -- + Noise regions are regions where no real data lies, only false data created by artifacts on the document or - scanner noise.""" + scanner noise. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ ] @@ -11469,8 +12446,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(NoiseRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("NoiseRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) def factory(*args_, **kwargs_): if CurrentSubclassModule_ is not None: subclass = getSubclassFromModule_( @@ -11486,14 +12463,14 @@ def get_ns_prefix_(self): return self.ns_prefix_ def set_ns_prefix_(self, ns_prefix): self.ns_prefix_ = ns_prefix - def hasContent_(self): + def has__content(self): if ( - super(NoiseRegionType, self).hasContent_() + super(NoiseRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='NoiseRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='NoiseRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('NoiseRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -11508,22 +12485,24 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='N showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NoiseRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NoiseRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='NoiseRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='NoiseRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='NoiseRegionType'): - super(NoiseRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NoiseRegionType') - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='NoiseRegionType', fromsubclass_=False, pretty_print=True): - super(NoiseRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='NoiseRegionType', mapping_=None, nsmap_=None): - element = super(NoiseRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='NoiseRegionType'): + super(NoiseRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='NoiseRegionType') + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='NoiseRegionType', fromsubclass_=False, pretty_print=True): + super(NoiseRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='NoiseRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(NoiseRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11531,15 +12510,15 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): - super(NoiseRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(NoiseRegionType, self).buildChildren(child_, node, nodeName_, True) + def _buildAttributes(self, node, attrs, already_processed): + super(NoiseRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(NoiseRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -11547,17 +12526,24 @@ def __hash__(self): class AdvertRegionType(RegionType): - """Regions containing advertisements. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The background colour of the region""" + """AdvertRegionType -- + Regions containing advertisements. + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * bgColour -- + The background colour of the region + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), ] subclass = None superclass = RegionType @@ -11566,8 +12552,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(AdvertRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("AdvertRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.bgColour = _cast(None, bgColour) @@ -11608,14 +12594,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(AdvertRegionType, self).hasContent_() + super(AdvertRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='AdvertRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AdvertRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('AdvertRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -11630,32 +12616,34 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='A showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AdvertRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AdvertRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='AdvertRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='AdvertRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='AdvertRegionType'): - super(AdvertRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AdvertRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='AdvertRegionType'): + super(AdvertRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AdvertRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) if self.bgColour is not None and 'bgColour' not in already_processed: already_processed.add('bgColour') outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='AdvertRegionType', fromsubclass_=False, pretty_print=True): - super(AdvertRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='AdvertRegionType', mapping_=None, nsmap_=None): - element = super(AdvertRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AdvertRegionType', fromsubclass_=False, pretty_print=True): + super(AdvertRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='AdvertRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(AdvertRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.bgColour is not None: element.set('bgColour', self.gds_format_string(self.bgColour)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11663,12 +12651,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -11679,9 +12667,9 @@ def buildAttributes(self, node, attrs, already_processed): already_processed.add('bgColour') self.bgColour = value self.validate_ColourSimpleType(self.bgColour) # validate type ColourSimpleType - super(AdvertRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(AdvertRegionType, self).buildChildren(child_, node, nodeName_, True) + super(AdvertRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(AdvertRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -11700,17 +12688,24 @@ def set_orientation(self, orientation): class MusicRegionType(RegionType): - """Regions containing musical notations. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The background colour of the region""" + """MusicRegionType -- + Regions containing musical notations. + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * bgColour -- + The background colour of the region + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), ] subclass = None superclass = RegionType @@ -11719,8 +12714,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(MusicRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("MusicRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.bgColour = _cast(None, bgColour) @@ -11761,14 +12756,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(MusicRegionType, self).hasContent_() + super(MusicRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='MusicRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MusicRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('MusicRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -11783,32 +12778,34 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='M showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MusicRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MusicRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MusicRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MusicRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MusicRegionType'): - super(MusicRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MusicRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MusicRegionType'): + super(MusicRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MusicRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) if self.bgColour is not None and 'bgColour' not in already_processed: already_processed.add('bgColour') outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='MusicRegionType', fromsubclass_=False, pretty_print=True): - super(MusicRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='MusicRegionType', mapping_=None, nsmap_=None): - element = super(MusicRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MusicRegionType', fromsubclass_=False, pretty_print=True): + super(MusicRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='MusicRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(MusicRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.bgColour is not None: element.set('bgColour', self.gds_format_string(self.bgColour)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11816,12 +12813,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -11832,9 +12829,9 @@ def buildAttributes(self, node, attrs, already_processed): already_processed.add('bgColour') self.bgColour = value self.validate_ColourSimpleType(self.bgColour) # validate type ColourSimpleType - super(MusicRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(MusicRegionType, self).buildChildren(child_, node, nodeName_, True) + super(MusicRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(MusicRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -11853,16 +12850,21 @@ def set_orientation(self, orientation): class MapRegionType(RegionType): - """Regions containing maps. - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180""" + """MapRegionType -- + Regions containing maps. + + * orientation -- + The angle the rectangle encapsulating a + region has to be rotated in clockwise + direction in order to correct the present + skew (negative values indicate + anti-clockwise rotation). Range: + -179.999,180 + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), ] subclass = None superclass = RegionType @@ -11871,8 +12873,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(MapRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("MapRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" def factory(*args_, **kwargs_): @@ -11894,14 +12896,14 @@ def get_orientation(self): return self.orientation def set_orientation(self, orientation): self.orientation = orientation - def hasContent_(self): + def has__content(self): if ( - super(MapRegionType, self).hasContent_() + super(MapRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='MapRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MapRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('MapRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -11916,27 +12918,29 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='M showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MapRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MapRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MapRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MapRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MapRegionType'): - super(MapRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MapRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MapRegionType'): + super(MapRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MapRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='MapRegionType', fromsubclass_=False, pretty_print=True): - super(MapRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='MapRegionType', mapping_=None, nsmap_=None): - element = super(MapRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MapRegionType', fromsubclass_=False, pretty_print=True): + super(MapRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='MapRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(MapRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -11944,20 +12948,20 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') value = self.gds_parse_float(value, node, 'orientation') self.orientation = value - super(MapRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(MapRegionType, self).buildChildren(child_, node, nodeName_, True) + super(MapRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(MapRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -11976,18 +12980,25 @@ def set_orientation(self, orientation): class ChemRegionType(RegionType): - """Regions containing chemical formulas. - The angle the rectangle encapsulating a - region has to be rotated in clockwise - direction in order to correct the present - skew (negative values indicate - anti-clockwise rotation). Range: - -179.999,180 - The background colour of the region""" + """ChemRegionType -- + Regions containing chemical formulas. + + * orientation -- + The angle the rectangle encapsulating a + region has to be rotated in clockwise + direction in order to correct the present + skew (negative values indicate + anti-clockwise rotation). Range: + -179.999,180 + + * bgColour -- + The background colour of the region + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), ] subclass = None superclass = RegionType @@ -11996,8 +13007,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(ChemRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("ChemRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.bgColour = _cast(None, bgColour) @@ -12038,14 +13049,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(ChemRegionType, self).hasContent_() + super(ChemRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='ChemRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ChemRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('ChemRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -12060,32 +13071,34 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='C showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChemRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChemRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ChemRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ChemRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ChemRegionType'): - super(ChemRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChemRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ChemRegionType'): + super(ChemRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChemRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) if self.bgColour is not None and 'bgColour' not in already_processed: already_processed.add('bgColour') outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='ChemRegionType', fromsubclass_=False, pretty_print=True): - super(ChemRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='ChemRegionType', mapping_=None, nsmap_=None): - element = super(ChemRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ChemRegionType', fromsubclass_=False, pretty_print=True): + super(ChemRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='ChemRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(ChemRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.bgColour is not None: element.set('bgColour', self.gds_format_string(self.bgColour)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -12093,12 +13106,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -12109,9 +13122,9 @@ def buildAttributes(self, node, attrs, already_processed): already_processed.add('bgColour') self.bgColour = value self.validate_ColourSimpleType(self.bgColour) # validate type ColourSimpleType - super(ChemRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(ChemRegionType, self).buildChildren(child_, node, nodeName_, True) + super(ChemRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(ChemRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -12130,18 +13143,25 @@ def set_orientation(self, orientation): class MathsRegionType(RegionType): - """Regions containing equations and mathematical symbols + """MathsRegionType -- + Regions containing equations and mathematical symbols should be marked as maths regions. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The background colour of the region""" + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * bgColour -- + The background colour of the region + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), ] subclass = None superclass = RegionType @@ -12150,8 +13170,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(MathsRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("MathsRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.bgColour = _cast(None, bgColour) @@ -12192,14 +13212,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(MathsRegionType, self).hasContent_() + super(MathsRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='MathsRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MathsRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('MathsRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -12214,32 +13234,34 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='M showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MathsRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MathsRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MathsRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MathsRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MathsRegionType'): - super(MathsRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MathsRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MathsRegionType'): + super(MathsRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MathsRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) if self.bgColour is not None and 'bgColour' not in already_processed: already_processed.add('bgColour') outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='MathsRegionType', fromsubclass_=False, pretty_print=True): - super(MathsRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='MathsRegionType', mapping_=None, nsmap_=None): - element = super(MathsRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MathsRegionType', fromsubclass_=False, pretty_print=True): + super(MathsRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='MathsRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(MathsRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.bgColour is not None: element.set('bgColour', self.gds_format_string(self.bgColour)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -12247,12 +13269,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -12263,9 +13285,9 @@ def buildAttributes(self, node, attrs, already_processed): already_processed.add('bgColour') self.bgColour = value self.validate_ColourSimpleType(self.bgColour) # validate type ColourSimpleType - super(MathsRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(MathsRegionType, self).buildChildren(child_, node, nodeName_, True) + super(MathsRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(MathsRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -12284,19 +13306,26 @@ def set_orientation(self, orientation): class SeparatorRegionType(RegionType): - """Separators are lines that lie between columns and + """SeparatorRegionType -- + Separators are lines that lie between columns and paragraphs and can be used to logically separate different articles from each other. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The colour of the separator""" + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * colour -- + The colour of the separator + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('colour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('colour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'colour'}), ] subclass = None superclass = RegionType @@ -12305,8 +13334,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(SeparatorRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("SeparatorRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.colour = _cast(None, colour) @@ -12347,14 +13376,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(SeparatorRegionType, self).hasContent_() + super(SeparatorRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='SeparatorRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='SeparatorRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('SeparatorRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -12369,32 +13398,34 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='S showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='SeparatorRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='SeparatorRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='SeparatorRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='SeparatorRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='SeparatorRegionType'): - super(SeparatorRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='SeparatorRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='SeparatorRegionType'): + super(SeparatorRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='SeparatorRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) if self.colour is not None and 'colour' not in already_processed: already_processed.add('colour') outfile.write(' colour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.colour), input_name='colour')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='SeparatorRegionType', fromsubclass_=False, pretty_print=True): - super(SeparatorRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='SeparatorRegionType', mapping_=None, nsmap_=None): - element = super(SeparatorRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='SeparatorRegionType', fromsubclass_=False, pretty_print=True): + super(SeparatorRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='SeparatorRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(SeparatorRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.colour is not None: element.set('colour', self.gds_format_string(self.colour)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -12402,12 +13433,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -12418,9 +13449,9 @@ def buildAttributes(self, node, attrs, already_processed): already_processed.add('colour') self.colour = value self.validate_ColourSimpleType(self.colour) # validate type ColourSimpleType - super(SeparatorRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(SeparatorRegionType, self).buildChildren(child_, node, nodeName_, True) + super(SeparatorRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(SeparatorRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -12439,26 +13470,39 @@ def set_orientation(self, orientation): class ChartRegionType(RegionType): - """Regions containing charts or graphs of any type, should + """ChartRegionType -- + Regions containing charts or graphs of any type, should be marked as chart regions. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The type of chart in the region - An approximation of the number of colours - used in the region - The background colour of the region - Specifies whether the region also contains - text""" + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * type -- + The type of chart in the region + + * numColours -- + An approximation of the number of colours + used in the region + + * bgColour -- + The background colour of the region + + * embText -- + Specifies whether the region also contains + text + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:ChartTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('numColours', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('type_', 'pc:ChartTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('numColours', 'int', 0, 1, {'use': 'optional', 'name': 'numColours'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), + MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}), ] subclass = None superclass = RegionType @@ -12467,8 +13511,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(ChartRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("ChartRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.type_ = _cast(None, type_) @@ -12540,14 +13584,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(ChartRegionType, self).hasContent_() + super(ChartRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='ChartRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ChartRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('ChartRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -12562,16 +13606,16 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='C showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChartRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChartRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ChartRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ChartRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ChartRegionType'): - super(ChartRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChartRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ChartRegionType'): + super(ChartRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChartRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) @@ -12587,10 +13631,10 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.embText is not None and 'embText' not in already_processed: already_processed.add('embText') outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='ChartRegionType', fromsubclass_=False, pretty_print=True): - super(ChartRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='ChartRegionType', mapping_=None, nsmap_=None): - element = super(ChartRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ChartRegionType', fromsubclass_=False, pretty_print=True): + super(ChartRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='ChartRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(ChartRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.type_ is not None: @@ -12603,6 +13647,8 @@ def to_etree(self, parent_element=None, name_='ChartRegionType', mapping_=None, element.set('embText', self.gds_format_boolean(self.embText)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -12610,12 +13656,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -12644,9 +13690,9 @@ def buildAttributes(self, node, attrs, already_processed): self.embText = False else: raise_parse_error(node, 'Bad boolean attribute') - super(ChartRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(ChartRegionType, self).buildChildren(child_, node, nodeName_, True) + super(ChartRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(ChartRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -12665,30 +13711,49 @@ def set_orientation(self, orientation): class TableRegionType(RegionType): - """Tabular data in any form is represented with a table + """TableRegionType -- + Tabular data in any form is represented with a table region. Rows and columns may or may not have separator lines; these lines are not separator regions. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The number of rows present in the table - The number of columns present in the table - The colour of the lines used in the region - The background colour of the region - Specifies the presence of line separators - Specifies whether the region also contains - text""" + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * rows -- + The number of rows present in the table + + * columns -- + The number of columns present in the table + + * lineColour -- + The colour of the lines used in the region + + * bgColour -- + The background colour of the region + + * lineSeparators -- + Specifies the presence of line separators + + * embText -- + Specifies whether the region also contains + text + + * Grid -- Table grid (visible or virtual grid lines) + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('rows', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('columns', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('lineColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('lineSeparators', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('rows', 'int', 0, 1, {'use': 'optional', 'name': 'rows'}), + MemberSpec_('columns', 'int', 0, 1, {'use': 'optional', 'name': 'columns'}), + MemberSpec_('lineColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'lineColour'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), + MemberSpec_('lineSeparators', 'boolean', 0, 1, {'use': 'optional', 'name': 'lineSeparators'}), + MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}), MemberSpec_('Grid', 'GridType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'Grid', 'type': 'GridType'}, None), ] subclass = None @@ -12698,8 +13763,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(TableRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("TableRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.rows = _cast(int, rows) @@ -12776,15 +13841,15 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.Grid is not None or - super(TableRegionType, self).hasContent_() + super(TableRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='TableRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TableRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('TableRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -12799,16 +13864,16 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='T showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TableRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TableRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TableRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TableRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TableRegionType'): - super(TableRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TableRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TableRegionType'): + super(TableRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TableRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) @@ -12830,8 +13895,8 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.embText is not None and 'embText' not in already_processed: already_processed.add('embText') outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='TableRegionType', fromsubclass_=False, pretty_print=True): - super(TableRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TableRegionType', fromsubclass_=False, pretty_print=True): + super(TableRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) if pretty_print: eol_ = '\n' else: @@ -12839,8 +13904,8 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', if self.Grid is not None: namespaceprefix_ = self.Grid_nsprefix_ + ':' if (UseCapturedNS_ and self.Grid_nsprefix_) else '' self.Grid.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Grid', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='TableRegionType', mapping_=None, nsmap_=None): - element = super(TableRegionType, self).to_etree(parent_element, name_, mapping_) + def to_etree(self, parent_element=None, name_='TableRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(TableRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.rows is not None: @@ -12857,9 +13922,11 @@ def to_etree(self, parent_element=None, name_='TableRegionType', mapping_=None, element.set('embText', self.gds_format_boolean(self.embText)) if self.Grid is not None: Grid_ = self.Grid - Grid_.to_etree(element, name_='Grid', mapping_=mapping_, nsmap_=nsmap_) + Grid_.to_etree(element, name_='Grid', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -12867,12 +13934,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -12914,14 +13981,14 @@ def buildAttributes(self, node, attrs, already_processed): self.embText = False else: raise_parse_error(node, 'Bad boolean attribute') - super(TableRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(TableRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'Grid': obj_ = GridType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) self.Grid = obj_ obj_.original_tagname_ = 'Grid' - super(TableRegionType, self).buildChildren(child_, node, nodeName_, True) + super(TableRegionType, self)._buildChildren(child_, node, nodeName_, True) def __hash__(self): return hash(self.id) def set_orientation(self, orientation): @@ -12939,24 +14006,35 @@ def set_orientation(self, orientation): class GraphicRegionType(RegionType): - """Regions containing simple graphics, such as a company + """GraphicRegionType -- + Regions containing simple graphics, such as a company logo, should be marked as graphic regions. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The type of graphic in the region - An approximation of the number of colours - used in the region - Specifies whether the region also contains - text.""" + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * type -- + The type of graphic in the region + + * numColours -- + An approximation of the number of colours + used in the region + + * embText -- + Specifies whether the region also contains + text. + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:GraphicsTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('numColours', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('type_', 'pc:GraphicsTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('numColours', 'int', 0, 1, {'use': 'optional', 'name': 'numColours'}), + MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}), ] subclass = None superclass = RegionType @@ -12965,8 +14043,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(GraphicRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("GraphicRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.type_ = _cast(None, type_) @@ -13019,14 +14097,14 @@ def validate_GraphicsTypeSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on GraphicsTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(GraphicRegionType, self).hasContent_() + super(GraphicRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='GraphicRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphicRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('GraphicRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -13041,16 +14119,16 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='G showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphicRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphicRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphicRegionType'): - super(GraphicRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphicRegionType'): + super(GraphicRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) @@ -13063,10 +14141,10 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.embText is not None and 'embText' not in already_processed: already_processed.add('embText') outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='GraphicRegionType', fromsubclass_=False, pretty_print=True): - super(GraphicRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='GraphicRegionType', mapping_=None, nsmap_=None): - element = super(GraphicRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphicRegionType', fromsubclass_=False, pretty_print=True): + super(GraphicRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='GraphicRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(GraphicRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.type_ is not None: @@ -13077,6 +14155,8 @@ def to_etree(self, parent_element=None, name_='GraphicRegionType', mapping_=None element.set('embText', self.gds_format_boolean(self.embText)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -13084,12 +14164,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -13113,9 +14193,9 @@ def buildAttributes(self, node, attrs, already_processed): self.embText = False else: raise_parse_error(node, 'Bad boolean attribute') - super(GraphicRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(GraphicRegionType, self).buildChildren(child_, node, nodeName_, True) + super(GraphicRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(GraphicRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -13134,23 +14214,34 @@ def set_orientation(self, orientation): class LineDrawingRegionType(RegionType): - """A line drawing is a single colour illustration without + """LineDrawingRegionType -- + A line drawing is a single colour illustration without solid areas. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The pen (foreground) colour of the region - The background colour of the region - Specifies whether the region also contains - text""" + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * penColour -- + The pen (foreground) colour of the region + + * bgColour -- + The background colour of the region + + * embText -- + Specifies whether the region also contains + text + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('penColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('penColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'penColour'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), + MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}), ] subclass = None superclass = RegionType @@ -13159,8 +14250,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(LineDrawingRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("LineDrawingRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.penColour = _cast(None, penColour) @@ -13213,14 +14304,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(LineDrawingRegionType, self).hasContent_() + super(LineDrawingRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='LineDrawingRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LineDrawingRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('LineDrawingRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -13235,16 +14326,16 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='L showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LineDrawingRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LineDrawingRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LineDrawingRegionType'): - super(LineDrawingRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LineDrawingRegionType'): + super(LineDrawingRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) @@ -13257,10 +14348,10 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.embText is not None and 'embText' not in already_processed: already_processed.add('embText') outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='LineDrawingRegionType', fromsubclass_=False, pretty_print=True): - super(LineDrawingRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='LineDrawingRegionType', mapping_=None, nsmap_=None): - element = super(LineDrawingRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LineDrawingRegionType', fromsubclass_=False, pretty_print=True): + super(LineDrawingRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='LineDrawingRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(LineDrawingRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.penColour is not None: @@ -13271,6 +14362,8 @@ def to_etree(self, parent_element=None, name_='LineDrawingRegionType', mapping_= element.set('embText', self.gds_format_boolean(self.embText)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -13278,12 +14371,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -13308,9 +14401,9 @@ def buildAttributes(self, node, attrs, already_processed): self.embText = False else: raise_parse_error(node, 'Bad boolean attribute') - super(LineDrawingRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(LineDrawingRegionType, self).buildChildren(child_, node, nodeName_, True) + super(LineDrawingRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(LineDrawingRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -13329,23 +14422,34 @@ def set_orientation(self, orientation): class ImageRegionType(RegionType): - """An image is considered to be more intricate and complex + """ImageRegionType -- + An image is considered to be more intricate and complex than a graphic. These can be photos or drawings. - The angle the rectangle encapsulating a region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - The colour bit depth required for the region - The background colour of the region - Specifies whether the region also contains - text""" + + * orientation -- + The angle the rectangle encapsulating a region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * colourDepth -- + The colour bit depth required for the region + + * bgColour -- + The background colour of the region + + * embText -- + Specifies whether the region also contains + text + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('colourDepth', 'pc:ColourDepthSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('colourDepth', 'pc:ColourDepthSimpleType', 0, 1, {'use': 'optional', 'name': 'colourDepth'}), + MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), + MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}), ] subclass = None superclass = RegionType @@ -13354,8 +14458,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(ImageRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("ImageRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.colourDepth = _cast(None, colourDepth) @@ -13421,14 +14525,14 @@ def validate_ColourSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( - super(ImageRegionType, self).hasContent_() + super(ImageRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='ImageRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ImageRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('ImageRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -13443,16 +14547,16 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='I showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ImageRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ImageRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ImageRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ImageRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ImageRegionType'): - super(ImageRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ImageRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ImageRegionType'): + super(ImageRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ImageRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) @@ -13465,10 +14569,10 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.embText is not None and 'embText' not in already_processed: already_processed.add('embText') outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText')) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='ImageRegionType', fromsubclass_=False, pretty_print=True): - super(ImageRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='ImageRegionType', mapping_=None, nsmap_=None): - element = super(ImageRegionType, self).to_etree(parent_element, name_, mapping_) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ImageRegionType', fromsubclass_=False, pretty_print=True): + super(ImageRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def to_etree(self, parent_element=None, name_='ImageRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(ImageRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.colourDepth is not None: @@ -13479,6 +14583,8 @@ def to_etree(self, parent_element=None, name_='ImageRegionType', mapping_=None, element.set('embText', self.gds_format_boolean(self.embText)) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -13486,12 +14592,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -13516,9 +14622,9 @@ def buildAttributes(self, node, attrs, already_processed): self.embText = False else: raise_parse_error(node, 'Bad boolean attribute') - super(ImageRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): - super(ImageRegionType, self).buildChildren(child_, node, nodeName_, True) + super(ImageRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(ImageRegionType, self)._buildChildren(child_, node, nodeName_, True) pass def __hash__(self): return hash(self.id) @@ -13537,52 +14643,92 @@ def set_orientation(self, orientation): class TextRegionType(RegionType): - """Pure text is represented as a text region. This includes + """TextRegionType -- + Pure text is represented as a text region. This includes drop capitals, but practically ornate text may be considered as a graphic. - The angle the rectangle encapsulating the region - has to be rotated in clockwise direction - in order to correct the present skew - (negative values indicate anti-clockwise rotation). - (The rotated image can be further referenced - via “AlternativeImage”.) - Range: -179.999,180 - The nature of the text in the region - The degree of space in points between the lines of - text (line spacing) - The direction in which text within lines - should be read (order of words and characters), - in addition to “textLineOrder”. - The order of text lines within the block, - in addition to “readingDirection”. - The angle the baseline of text within the region - has to be rotated (relative to the rectangle - encapsulating the region) in clockwise direction - in order to correct the present skew, - in addition to “orientation” - (negative values indicate anti-clockwise rotation). - Range: -179.999,180 - Defines whether a region of text is indented or not - Text align - The primary language used in the region - The secondary language used in the region - The primary script used in the region - The secondary script used in the region""" + + * orientation -- + The angle the rectangle encapsulating the region + has to be rotated in clockwise direction + in order to correct the present skew + (negative values indicate anti-clockwise rotation). + (The rotated image can be further referenced + via + “ + AlternativeImage + ” + .) + Range: -179.999,180 + + * type -- + The nature of the text in the region + + * leading -- + The degree of space in points between the lines of + text (line spacing) + + * readingDirection -- + The direction in which text within lines + should be read (order of words and characters), + in addition to + “ + textLineOrder + ” + . + + * textLineOrder -- + The order of text lines within the block, + in addition to + “ + readingDirection + ” + . + + * readingOrientation -- + The angle the baseline of text within the region + has to be rotated (relative to the rectangle + encapsulating the region) in clockwise direction + in order to correct the present skew, + in addition to + “ + orientation + ” + (negative values indicate anti-clockwise rotation). + Range: -179.999,180 + + * indented -- + Defines whether a region of text is indented or not + + * align -- Text align + * primaryLanguage -- + The primary language used in the region + + * secondaryLanguage -- + The secondary language used in the region + + * primaryScript -- + The primary script used in the region + + * secondaryScript -- + The secondary script used in the region + + """ __hash__ = GeneratedsSuper.__hash__ member_data_items_ = [ - MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('type_', 'pc:TextTypeSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('leading', 'int', 0, 1, {'use': 'optional'}), - MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('textLineOrder', 'pc:TextLineOrderSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('readingOrientation', 'float', 0, 1, {'use': 'optional'}), - MemberSpec_('indented', 'boolean', 0, 1, {'use': 'optional'}), - MemberSpec_('align', 'pc:AlignSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('secondaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional'}), - MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional'}), + MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), + MemberSpec_('type_', 'pc:TextTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), + MemberSpec_('leading', 'int', 0, 1, {'use': 'optional', 'name': 'leading'}), + MemberSpec_('readingDirection', 'pc:ReadingDirectionSimpleType', 0, 1, {'use': 'optional', 'name': 'readingDirection'}), + MemberSpec_('textLineOrder', 'pc:TextLineOrderSimpleType', 0, 1, {'use': 'optional', 'name': 'textLineOrder'}), + MemberSpec_('readingOrientation', 'float', 0, 1, {'use': 'optional', 'name': 'readingOrientation'}), + MemberSpec_('indented', 'boolean', 0, 1, {'use': 'optional', 'name': 'indented'}), + MemberSpec_('align', 'pc:AlignSimpleType', 0, 1, {'use': 'optional', 'name': 'align'}), + MemberSpec_('primaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional', 'name': 'primaryLanguage'}), + MemberSpec_('secondaryLanguage', 'pc:LanguageSimpleType', 0, 1, {'use': 'optional', 'name': 'secondaryLanguage'}), + MemberSpec_('primaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'primaryScript'}), + MemberSpec_('secondaryScript', 'pc:ScriptSimpleType', 0, 1, {'use': 'optional', 'name': 'secondaryScript'}), + MemberSpec_('production', 'pc:ProductionSimpleType', 0, 1, {'use': 'optional', 'name': 'production'}), MemberSpec_('TextLine', 'TextLineType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'TextLine', 'type': 'TextLineType'}, None), MemberSpec_('TextEquiv', 'TextEquivType', 1, 1, {'maxOccurs': 'unbounded', 'minOccurs': '0', 'name': 'TextEquiv', 'type': 'TextEquivType'}, None), MemberSpec_('TextStyle', 'TextStyleType', 0, 1, {'maxOccurs': '1', 'minOccurs': '0', 'name': 'TextStyle', 'type': 'TextStyleType'}, None), @@ -13594,8 +14740,8 @@ def __init__(self, id=None, custom=None, comments=None, continuation=None, Alter self.gds_elementtree_node_ = None self.original_tagname_ = None self.parent_object_ = kwargs_.get('parent_object_') - self.ns_prefix_ = None - super(TextRegionType, self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) + self.ns_prefix_ = "pc" + super(globals().get("TextRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) self.orientation = _cast(float, orientation) self.orientation_nsprefix_ = "pc" self.type_ = _cast(None, type_) @@ -13816,17 +14962,17 @@ def validate_ProductionSimpleType(self, value): lineno = self.gds_get_node_lineno_() self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ProductionSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) result = False - def hasContent_(self): + def has__content(self): if ( self.TextLine or self.TextEquiv or self.TextStyle is not None or - super(TextRegionType, self).hasContent_() + super(TextRegionType, self).has__content() ): return True else: return False - def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='TextRegionType', pretty_print=True): + def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextRegionType', pretty_print=True): imported_ns_def_ = GenerateDSNamespaceDefs_.get('TextRegionType') if imported_ns_def_ is not None: namespacedef_ = imported_ns_def_ @@ -13841,16 +14987,16 @@ def export(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='T showIndent(outfile, level, pretty_print) outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) already_processed = set() - self.exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextRegionType') - if self.hasContent_(): + self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextRegionType') + if self.has__content(): outfile.write('>%s' % (eol_, )) - self.exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextRegionType', pretty_print=pretty_print) + self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='TextRegionType', pretty_print=pretty_print) showIndent(outfile, level, pretty_print) outfile.write('%s' % (namespaceprefix_, name_, eol_)) else: outfile.write('/>%s' % (eol_, )) - def exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextRegionType'): - super(TextRegionType, self).exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextRegionType') + def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='TextRegionType'): + super(TextRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='TextRegionType') if self.orientation is not None and 'orientation' not in already_processed: already_processed.add('orientation') outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) @@ -13890,8 +15036,8 @@ def exportAttributes(self, outfile, level, already_processed, namespaceprefix_=' if self.production is not None and 'production' not in already_processed: already_processed.add('production') outfile.write(' production=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.production), input_name='production')), )) - def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', name_='TextRegionType', fromsubclass_=False, pretty_print=True): - super(TextRegionType, self).exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) + def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextRegionType', fromsubclass_=False, pretty_print=True): + super(TextRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) if pretty_print: eol_ = '\n' else: @@ -13905,8 +15051,8 @@ def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='', if self.TextStyle is not None: namespaceprefix_ = self.TextStyle_nsprefix_ + ':' if (UseCapturedNS_ and self.TextStyle_nsprefix_) else '' self.TextStyle.export(outfile, level, namespaceprefix_, namespacedef_='', name_='TextStyle', pretty_print=pretty_print) - def to_etree(self, parent_element=None, name_='TextRegionType', mapping_=None, nsmap_=None): - element = super(TextRegionType, self).to_etree(parent_element, name_, mapping_) + def to_etree(self, parent_element=None, name_='TextRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): + element = super(TextRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) if self.orientation is not None: element.set('orientation', self.gds_format_float(self.orientation)) if self.type_ is not None: @@ -13934,14 +15080,16 @@ def to_etree(self, parent_element=None, name_='TextRegionType', mapping_=None, n if self.production is not None: element.set('production', self.gds_format_string(self.production)) for TextLine_ in self.TextLine: - TextLine_.to_etree(element, name_='TextLine', mapping_=mapping_, nsmap_=nsmap_) + TextLine_.to_etree(element, name_='TextLine', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) for TextEquiv_ in self.TextEquiv: - TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, nsmap_=nsmap_) + TextEquiv_.to_etree(element, name_='TextEquiv', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if self.TextStyle is not None: TextStyle_ = self.TextStyle - TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, nsmap_=nsmap_) + TextStyle_.to_etree(element, name_='TextStyle', mapping_=mapping_, reverse_mapping_=reverse_mapping_, nsmap_=nsmap_) if mapping_ is not None: mapping_[id(self)] = element + if reverse_mapping_ is not None: + reverse_mapping_[element] = self return element def build(self, node, gds_collector_=None): self.gds_collector_ = gds_collector_ @@ -13949,12 +15097,12 @@ def build(self, node, gds_collector_=None): self.gds_elementtree_node_ = node already_processed = set() self.ns_prefix_ = node.prefix - self.buildAttributes(node, node.attrib, already_processed) + self._buildAttributes(node, node.attrib, already_processed) for child in node: nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] - self.buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) + self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) return self - def buildAttributes(self, node, attrs, already_processed): + def _buildAttributes(self, node, attrs, already_processed): value = find_attr_value_('orientation', node) if value is not None and 'orientation' not in already_processed: already_processed.add('orientation') @@ -14023,8 +15171,8 @@ def buildAttributes(self, node, attrs, already_processed): already_processed.add('production') self.production = value self.validate_ProductionSimpleType(self.production) # validate type ProductionSimpleType - super(TextRegionType, self).buildAttributes(node, attrs, already_processed) - def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): + super(TextRegionType, self)._buildAttributes(node, attrs, already_processed) + def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): if nodeName_ == 'TextLine': obj_ = TextLineType.factory(parent_object_=self) obj_.build(child_, gds_collector_=gds_collector_) @@ -14040,7 +15188,7 @@ def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collec obj_.build(child_, gds_collector_=gds_collector_) self.TextStyle = obj_ obj_.original_tagname_ = 'TextStyle' - super(TextRegionType, self).buildChildren(child_, node, nodeName_, True) + super(TextRegionType, self)._buildChildren(child_, node, nodeName_, True) def __hash__(self): return hash(self.id) def set_orientation(self, orientation): @@ -14057,6 +15205,11 @@ def set_orientation(self, orientation): # end class TextRegionType +# +# End data representation classes. +# + + GDSClassesMapping = { 'PcGts': PcGtsType, } @@ -14074,9 +15227,10 @@ def usage(): def get_root_tag(node): tag = Tag_pattern_.match(node.tag).groups()[-1] - rootClass = GDSClassesMapping.get(tag) + prefix_tag = TagNamePrefix + tag + rootClass = GDSClassesMapping.get(prefix_tag) if rootClass is None: - rootClass = globals().get(tag) + rootClass = globals().get(prefix_tag) return tag, rootClass @@ -14130,7 +15284,7 @@ def parse(inFileName, silence=False, print_warnings=True): def parseEtree(inFileName, silence=False, print_warnings=True, - mapping=None, nsmap=None): + mapping=None, reverse_mapping=None, nsmap=None): parser = None doc = parsexml_(inFileName, parser) gds_collector = GdsCollector_() @@ -14141,12 +15295,15 @@ def parseEtree(inFileName, silence=False, print_warnings=True, rootClass = PcGts rootObj = rootClass.factory() rootObj.build(rootNode, gds_collector_=gds_collector) - # Enable Python to collect the space used by the DOM. if mapping is None: mapping = {} + if reverse_mapping is None: + reverse_mapping = {} rootElement = rootObj.to_etree( - None, name_=rootTag, mapping_=mapping, nsmap_=nsmap) - reverse_mapping = rootObj.gds_reverse_node_mapping(mapping) + None, name_=rootTag, mapping_=mapping, + reverse_mapping_=reverse_mapping, nsmap_=nsmap) + reverse_node_mapping = rootObj.gds_reverse_node_mapping(mapping) + # Enable Python to collect the space used by the DOM. if not SaveElementTreeNode: doc = None rootNode = None @@ -14163,7 +15320,7 @@ def parseEtree(inFileName, silence=False, print_warnings=True, len(gds_collector.get_messages()), )) gds_collector.write_messages(sys.stderr) sys.stderr.write(separator) - return rootObj, rootElement, mapping, reverse_mapping + return rootObj, rootElement, mapping, reverse_node_mapping def parseString(inString, silence=False, print_warnings=True): @@ -14247,6 +15404,224 @@ def main(): RenameMappings_ = { } +# +# Mapping of namespaces to types defined in them +# and the file in which each is defined. +# simpleTypes are marked "ST" and complexTypes "CT". +NamespaceToDefMappings_ = {'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15': [('ColourSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('ReadingDirectionSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('TextLineOrderSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('TextTypeSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('PageTypeSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('ConfSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('LanguageSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('ScriptSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('ColourDepthSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('GraphicsTypeSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('ChartTypeSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('PointsType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('ProductionSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('AlignSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('GroupTypeSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('TextDataTypeSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('UnderlineStyleSimpleType', + 'src/ocrd_validators/page.xsd', + 'ST'), + ('PcGtsType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('MetadataType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('MetadataItemType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('LabelsType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('LabelType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('PageType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('TextRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('CoordsType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('TextLineType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('WordType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GlyphType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('TextEquivType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('ImageRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('LineDrawingRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GraphicRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('TableRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GridType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GridPointsType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('ChartRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('SeparatorRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('MathsRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('ChemRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('MapRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('MusicRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('AdvertRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('NoiseRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('UnknownRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('CustomRegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('PrintSpaceType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('ReadingOrderType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('RegionRefIndexedType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('OrderedGroupIndexedType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('UnorderedGroupIndexedType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('RegionRefType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('OrderedGroupType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('UnorderedGroupType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('BorderType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('LayersType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('LayerType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('BaselineType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('RelationsType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('RelationType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('TextStyleType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('RegionType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('AlternativeImageType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GraphemesType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GraphemeBaseType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GraphemeType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('NonPrintingCharType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('GraphemeGroupType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('UserDefinedType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('UserAttributeType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('TableCellRoleType', + 'src/ocrd_validators/page.xsd', + 'CT'), + ('RolesType', + 'src/ocrd_validators/page.xsd', + 'CT')]} + __all__ = [ "AdvertRegionType", "AlternativeImageType", diff --git a/src/ocrd_models/xpath_functions.py b/src/ocrd_models/xpath_functions.py new file mode 100644 index 0000000000..c204811cae --- /dev/null +++ b/src/ocrd_models/xpath_functions.py @@ -0,0 +1,51 @@ +from ocrd_utils import xywh_from_points + +pc_functions = [] + +def _export(func): + pc_functions.append(func) + return func + +@_export +def pc_pixelarea(nodes): + """ + Extract Coords/@points from all nodes, calculate the bounding + box, and accumulate areas. + """ + area = 0 + for node in nodes: + # FIXME: find out why we need to go to the parent here + node = node.parent.value + coords = node.find(f'{node.prefix}:Coords', node.nsmap) + if coords is None: + continue + points = coords.attrib['points'] + xywh = xywh_from_points(points) + area += xywh['w'] * xywh['h'] + return area + +@_export +def pc_textequiv(nodes): + """ + Extract TextEquiv/Unicode from all nodes, then concatenate + (interspersed with spaces or newlines). + """ + text = '' + for node in nodes: + # FIXME: find out why we need to go to the parent here + node = node.parent.value + if text and node.tag.endswith('Region'): + text += '\n' + if text and node.tag.endswith('Line'): + text += '\n' + if text and node.tag.endswith('Word'): + text += ' ' + equiv = node.find(f'{node.prefix}:TextEquiv', node.nsmap) + if equiv is None: + continue + string = equiv.find(f'{node.prefix}:Unicode', node.nsmap) + if string is None: + continue + text += str(string.text) + return text + diff --git a/src/ocrd_network/cli/client.py b/src/ocrd_network/cli/client.py index 9c7f15c88f..350cf64b90 100644 --- a/src/ocrd_network/cli/client.py +++ b/src/ocrd_network/cli/client.py @@ -2,6 +2,7 @@ from json import dumps from typing import List, Optional, Tuple from ocrd.decorators.parameter_option import parameter_option, parameter_override_option +from ocrd_network.constants import JobState from ocrd_utils import DEFAULT_METS_BASENAME from ocrd_utils.introspect import set_json_key_value_overrides from ocrd_utils.str import parse_json_string_or_file @@ -104,8 +105,10 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str): @click.option('--result-queue-name') @click.option('--callback-url') @click.option('--agent-type', default='worker') -@click.option('-b', '--block', default=False, +@click.option('-b', '--block', default=False, is_flag=True, help='If set, the client will block till job timeout, fail or success.') +@click.option('-p', '--print-state', default=False, is_flag=True, + help='If set, the client will print job states by each iteration.') def send_processing_job_request( address: Optional[str], processor_name: str, @@ -120,7 +123,8 @@ def send_processing_job_request( # TODO: This is temporally available to toggle # between the ProcessingWorker/ProcessorServer agent_type: Optional[str], - block: Optional[bool] + block: Optional[bool], + print_state: Optional[bool] ): """ Submit a processing job to the processing server. @@ -146,7 +150,7 @@ def send_processing_job_request( assert processing_job_id print(f"Processing job id: {processing_job_id}") if block: - client.poll_job_status(job_id=processing_job_id) + client.poll_job_status(job_id=processing_job_id, print_state=print_state) @client_cli.group('workflow') @@ -176,24 +180,39 @@ def check_workflow_job_status(address: Optional[str], workflow_job_id: str): 'the "OCRD_NETWORK_SERVER_ADDR_PROCESSING" env variable is used by default') @click.option('-m', '--path-to-mets', required=True) @click.option('-w', '--path-to-workflow', required=True) -@click.option('-b', '--block', default=False, +@click.option('--page-wise/--no-page-wise', is_flag=True, default=False, help="Whether to generate per-page jobs") +@click.option('-b', '--block', default=False, is_flag=True, help='If set, the client will block till job timeout, fail or success.') +@click.option('-p', '--print-state', default=False, is_flag=True, + help='If set, the client will print job states by each iteration.') def send_workflow_job_request( address: Optional[str], path_to_mets: str, path_to_workflow: str, - block: Optional[bool] + page_wise: bool, + block: bool, + print_state: bool ): """ Submit a workflow job to the processing server. """ client = Client(server_addr_processing=address) - workflow_job_id = client.send_workflow_job_request(path_to_wf=path_to_workflow, path_to_mets=path_to_mets) + workflow_job_id = client.send_workflow_job_request( + path_to_wf=path_to_workflow, + path_to_mets=path_to_mets, + page_wise=page_wise, + ) assert workflow_job_id print(f"Workflow job id: {workflow_job_id}") if block: - client.poll_workflow_status(job_id=workflow_job_id) - + print(f"Polling state of workflow job {workflow_job_id}") + state = client.poll_workflow_status(job_id=workflow_job_id, print_state=print_state) + if state != JobState.success: + print(f"Workflow failed with {state}") + exit(1) + else: + print(f"Workflow succeeded") + exit(0) @client_cli.group('workspace') def workspace_cli(): diff --git a/src/ocrd_network/client.py b/src/ocrd_network/client.py index 8ec8e541ea..bb7cf4dbf2 100644 --- a/src/ocrd_network/client.py +++ b/src/ocrd_network/client.py @@ -46,18 +46,21 @@ def check_job_status(self, job_id: str): def check_workflow_status(self, workflow_job_id: str): return get_ps_workflow_job_status(self.server_addr_processing, workflow_job_id=workflow_job_id) - def poll_job_status(self, job_id: str) -> str: + def poll_job_status(self, job_id: str, print_state: bool = False) -> str: return poll_job_status_till_timeout_fail_or_success( - ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait) + ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait, + print_state=print_state) - def poll_workflow_status(self, job_id: str) -> str: + def poll_workflow_status(self, job_id: str, print_state: bool = False) -> str: return poll_wf_status_till_timeout_fail_or_success( - ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait) + ps_server_host=self.server_addr_processing, job_id=job_id, tries=self.polling_tries, wait=self.polling_wait, + print_state=print_state) def send_processing_job_request(self, processor_name: str, req_params: dict) -> str: return post_ps_processing_request( ps_server_host=self.server_addr_processing, processor=processor_name, job_input=req_params) - def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str): + def send_workflow_job_request(self, path_to_wf: str, path_to_mets: str, page_wise: bool = False): return post_ps_workflow_request( - ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets) + ps_server_host=self.server_addr_processing, path_to_wf=path_to_wf, path_to_mets=path_to_mets, + page_wise=page_wise) diff --git a/src/ocrd_network/client_utils.py b/src/ocrd_network/client_utils.py index 9b924c16a4..4eaf4ea95b 100644 --- a/src/ocrd_network/client_utils.py +++ b/src/ocrd_network/client_utils.py @@ -1,9 +1,10 @@ +import json from requests import get as request_get, post as request_post from time import sleep from .constants import JobState, NETWORK_PROTOCOLS -def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int): +def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries: int, wait: int, print_state: bool = False) -> JobState: if job_type not in ["workflow", "processor"]: raise ValueError(f"Unknown job type '{job_type}', expected 'workflow' or 'processor'") job_state = JobState.unset @@ -13,18 +14,22 @@ def _poll_endpoint_status(ps_server_host: str, job_id: str, job_type: str, tries job_state = get_ps_processing_job_status(ps_server_host, job_id) if job_type == "workflow": job_state = get_ps_workflow_job_status(ps_server_host, job_id) + if print_state: + print(f"State of the {job_type} job {job_id}: {job_state}") if job_state == JobState.success or job_state == JobState.failed: break tries -= 1 return job_state -def poll_job_status_till_timeout_fail_or_success(ps_server_host: str, job_id: str, tries: int, wait: int) -> JobState: - return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait) +def poll_job_status_till_timeout_fail_or_success( + ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState: + return _poll_endpoint_status(ps_server_host, job_id, "processor", tries, wait, print_state) -def poll_wf_status_till_timeout_fail_or_success(ps_server_host: str, job_id: str, tries: int, wait: int) -> JobState: - return _poll_endpoint_status(ps_server_host, job_id, "workflow", tries, wait) +def poll_wf_status_till_timeout_fail_or_success( + ps_server_host: str, job_id: str, tries: int, wait: int, print_state: bool = False) -> JobState: + return _poll_endpoint_status(ps_server_host, job_id, "workflow", tries, wait, print_state) def get_ps_deployed_processors(ps_server_host: str): @@ -47,22 +52,21 @@ def get_ps_processing_job_log(ps_server_host: str, processing_job_id: str): return response -def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> str: +def get_ps_processing_job_status(ps_server_host: str, processing_job_id: str) -> JobState: request_url = f"{ps_server_host}/processor/job/{processing_job_id}" response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"}) assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}" job_state = response.json()["state"] assert job_state - return job_state - + return getattr(JobState, job_state.lower()) -def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> str: +def get_ps_workflow_job_status(ps_server_host: str, workflow_job_id: str) -> JobState: request_url = f"{ps_server_host}/workflow/job-simple/{workflow_job_id}" response = request_get(url=request_url, headers={"accept": "application/json; charset=utf-8"}) assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}" job_state = response.json()["state"] assert job_state - return job_state + return getattr(JobState, job_state.lower()) def post_ps_processing_request(ps_server_host: str, processor: str, job_input: dict) -> str: @@ -78,9 +82,13 @@ def post_ps_processing_request(ps_server_host: str, processor: str, job_input: d return processing_job_id -# TODO: Can be extended to include other parameters such as page_wise -def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets: str) -> str: - request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise=True" +def post_ps_workflow_request( + ps_server_host: str, + path_to_wf: str, + path_to_mets: str, + page_wise: bool = False, +) -> str: + request_url = f"{ps_server_host}/workflow/run?mets_path={path_to_mets}&page_wise={'True' if page_wise else 'False'}" response = request_post( url=request_url, headers={"accept": "application/json; charset=utf-8"}, @@ -88,8 +96,11 @@ def post_ps_workflow_request(ps_server_host: str, path_to_wf: str, path_to_mets: ) # print(response.json()) # print(response.__dict__) + json_resp_raw = response.text + # print(f'post_ps_workflow_request >> {response.status_code}') + # print(f'post_ps_workflow_request >> {json_resp_raw}') assert response.status_code == 200, f"Processing server: {request_url}, {response.status_code}" - wf_job_id = response.json()["job_id"] + wf_job_id = json.loads(json_resp_raw)["job_id"] assert wf_job_id return wf_job_id diff --git a/src/ocrd_network/processing_server.py b/src/ocrd_network/processing_server.py index 34c22e5cf6..31eeca5299 100644 --- a/src/ocrd_network/processing_server.py +++ b/src/ocrd_network/processing_server.py @@ -1,7 +1,7 @@ from datetime import datetime from os import getpid from pathlib import Path -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union from uvicorn import run as uvicorn_run from fastapi import APIRouter, FastAPI, File, HTTPException, Request, status, UploadFile @@ -48,6 +48,7 @@ get_workflow_content, get_from_database_workspace, get_from_database_workflow_job, + kill_mets_server_zombies, parse_workflow_tasks, raise_http_exception, request_processor_server_tool_json, @@ -78,7 +79,6 @@ class ProcessingServer(FastAPI): """ def __init__(self, config_path: str, host: str, port: int) -> None: - initLogging() self.title = "OCR-D Processing Server" super().__init__( title=self.title, @@ -86,6 +86,7 @@ def __init__(self, config_path: str, host: str, port: int) -> None: on_shutdown=[self.on_shutdown], description="OCR-D Processing Server" ) + initLogging() self.log = getLogger("ocrd_network.processing_server") log_file = get_processing_server_logging_file_path(pid=getpid()) configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a") @@ -155,7 +156,7 @@ def start(self) -> None: queue_names = self.deployer.find_matching_network_agents( worker_only=True, str_names_only=True, unique_only=True ) - self.log.debug(f"Creating message queues on RabbitMQ instance url: {self.rabbitmq_url}") + self.log.info(f"Creating message queues on RabbitMQ instance url: {self.rabbitmq_url}") create_message_queues(logger=self.log, rmq_publisher=self.rmq_publisher, queue_names=queue_names) self.deployer.deploy_network_agents(mongodb_url=self.mongodb_url, rabbitmq_url=self.rabbitmq_url) @@ -167,6 +168,7 @@ def start(self) -> None: uvicorn_run(self, host=self.hostname, port=int(self.port)) async def on_startup(self): + self.log.info(f"Initializing the Database on: {self.mongodb_url}") await initiate_database(db_url=self.mongodb_url) async def on_shutdown(self) -> None: @@ -200,6 +202,14 @@ def add_api_routes_others(self): tags=[ServerApiTags.WORKSPACE], summary="Forward a TCP request to UDS mets server" ) + others_router.add_api_route( + path="/kill_mets_server_zombies", + endpoint=self.kill_mets_server_zombies, + methods=["DELETE"], + tags=[ServerApiTags.WORKFLOW, ServerApiTags.PROCESSING], + status_code=status.HTTP_200_OK, + summary="!! Workaround Do Not Use Unless You Have A Reason !! Kill all METS servers on this machine that have been created more than 60 minutes ago." + ) self.include_router(others_router) def add_api_routes_processing(self): @@ -320,7 +330,7 @@ async def forward_tcp_request_to_uds_mets_server(self, request: Request) -> Dict """Forward mets-server-request A processor calls a mets related method like add_file with ClientSideOcrdMets. This sends - a request to this endpoint. This request contains all infomation neccessary to make a call + a request to this endpoint. This request contains all information necessary to make a call to the uds-mets-server. This information is used by `MetsServerProxy` to make a the call to the local (local for the processing-server) reachable the uds-mets-server. """ @@ -574,26 +584,20 @@ async def _cancel_cached_dependent_jobs(self, workspace_key: str, job_id: str) - ) async def _consume_cached_jobs_of_workspace( - self, workspace_key: str, mets_server_url: str + self, workspace_key: str, mets_server_url: str, path_to_mets: str ) -> List[PYJobInput]: - - # Check whether the internal queue for the workspace key still exists - if workspace_key not in self.cache_processing_requests.processing_requests: - self.log.debug(f"No internal queue available for workspace with key: {workspace_key}") - return [] - # decrease the internal cache counter by 1 request_counter = self.cache_processing_requests.update_request_counter( workspace_key=workspace_key, by_value=-1 ) self.log.debug(f"Internal processing job cache counter value: {request_counter}") - if not len(self.cache_processing_requests.processing_requests[workspace_key]): + if (workspace_key not in self.cache_processing_requests.processing_requests or + not len(self.cache_processing_requests.processing_requests[workspace_key])): if request_counter <= 0: # Shut down the Mets Server for the workspace_key since no # more internal callbacks are expected for that workspace self.log.debug(f"Stopping the mets server: {mets_server_url}") - - self.deployer.stop_uds_mets_server(mets_server_url=mets_server_url) + self.deployer.stop_uds_mets_server(mets_server_url=mets_server_url, path_to_mets=path_to_mets) try: # The queue is empty - delete it @@ -609,6 +613,10 @@ async def _consume_cached_jobs_of_workspace( else: self.log.debug(f"Internal request cache is empty but waiting for {request_counter} result callbacks.") return [] + # Check whether the internal queue for the workspace key still exists + if workspace_key not in self.cache_processing_requests.processing_requests: + self.log.debug(f"No internal queue available for workspace with key: {workspace_key}") + return [] consumed_requests = await self.cache_processing_requests.consume_cached_requests(workspace_key=workspace_key) return consumed_requests @@ -643,7 +651,7 @@ async def remove_job_from_request_cache(self, result_message: PYResultMessage): raise_http_exception(self.log, status.HTTP_404_NOT_FOUND, message, error) consumed_cached_jobs = await self._consume_cached_jobs_of_workspace( - workspace_key=workspace_key, mets_server_url=mets_server_url + workspace_key=workspace_key, mets_server_url=mets_server_url, path_to_mets=path_to_mets ) await self.push_cached_jobs_to_agents(processing_jobs=consumed_cached_jobs) @@ -817,6 +825,10 @@ async def get_workflow_info(self, workflow_job_id) -> Dict: response = self._produce_workflow_status_response(processing_jobs=jobs) return response + async def kill_mets_server_zombies(self, minutes_ago : Optional[int] = None, dry_run : Optional[bool] = None) -> List[int]: + pids_killed = kill_mets_server_zombies(minutes_ago=minutes_ago, dry_run=dry_run) + return pids_killed + async def get_workflow_info_simple(self, workflow_job_id) -> Dict[str, JobState]: """ Simplified version of the `get_workflow_info` that returns a single state for the entire workflow. diff --git a/src/ocrd_network/processing_worker.py b/src/ocrd_network/processing_worker.py index a352ea5fde..302100743d 100644 --- a/src/ocrd_network/processing_worker.py +++ b/src/ocrd_network/processing_worker.py @@ -9,12 +9,12 @@ """ from datetime import datetime -from os import getpid +from os import getpid, getppid from pika import BasicProperties from pika.adapters.blocking_connection import BlockingChannel from pika.spec import Basic -from ocrd_utils import getLogger +from ocrd_utils import getLogger, initLogging from .constants import JobState from .database import sync_initiate_database, sync_db_get_workspace, sync_db_update_processing_job, verify_database_uri from .logging_utils import ( @@ -35,14 +35,16 @@ class ProcessingWorker: def __init__(self, rabbitmq_addr, mongodb_addr, processor_name, ocrd_tool: dict, processor_class=None) -> None: + initLogging() self.log = getLogger(f'ocrd_network.processing_worker') log_file = get_processing_worker_logging_file_path(processor_name=processor_name, pid=getpid()) configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a") try: verify_database_uri(mongodb_addr) - self.log.debug(f'Verified MongoDB URL: {mongodb_addr}') + self.log.info(f'Verified MongoDB URL: {mongodb_addr}') self.rmq_data = verify_and_parse_mq_uri(rabbitmq_addr) + self.log.info(f'Verified RabbitMQ URL: {rabbitmq_addr}') except ValueError as error: msg = f"Failed to parse data, error: {error}" self.log.exception(msg) @@ -61,6 +63,7 @@ def __init__(self, rabbitmq_addr, mongodb_addr, processor_name, ocrd_tool: dict, # Gets assigned when the `connect_publisher` is called on the worker object # Used to publish OcrdResultMessage type message to the queue with name {processor_name}-result self.rmq_publisher = None + self.log.info(f"Initialized processing worker: {processor_name}") def connect_consumer(self): self.rmq_consumer = connect_rabbitmq_consumer(self.log, self.rmq_data) @@ -240,7 +243,7 @@ def publish_result_to_all(self, processing_message: OcrdProcessingMessage, resul # post the result message (callback to a user defined endpoint) post_to_callback_url(self.log, callback_url, result_message) if internal_callback_url: - self.log.info(f"Publishing result to internal callback url (Processing Server): {callback_url}") + self.log.info(f"Publishing result to internal callback url (Processing Server): {internal_callback_url}") # If the internal callback_url field is set, # post the result message (callback to Processing Server endpoint) post_to_callback_url(self.log, internal_callback_url, result_message) diff --git a/src/ocrd_network/processor_server.py b/src/ocrd_network/processor_server.py index 5aed89d72c..60674afbf6 100644 --- a/src/ocrd_network/processor_server.py +++ b/src/ocrd_network/processor_server.py @@ -42,13 +42,13 @@ class ProcessorServer(FastAPI): def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class=None): if not (processor_name or processor_class): raise ValueError("Either 'processor_name' or 'processor_class' must be provided") - initLogging() super().__init__( on_startup=[self.on_startup], on_shutdown=[self.on_shutdown], title=f"Network agent - Processor Server", description="Network agent - Processor Server" ) + initLogging() self.log = getLogger("ocrd_network.processor_server") log_file = get_processor_server_logging_file_path(processor_name=processor_name, pid=getpid()) configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a") @@ -69,6 +69,7 @@ def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class= self.processor_name = self.ocrd_tool["executable"] self.add_api_routes_processing() + self.log.info(f"Initialized processor server: {processor_name}") async def on_startup(self): await initiate_database(db_url=self.db_url) diff --git a/src/ocrd_network/rabbitmq_utils/connector.py b/src/ocrd_network/rabbitmq_utils/connector.py index 893d55a219..8fbbc84ab9 100644 --- a/src/ocrd_network/rabbitmq_utils/connector.py +++ b/src/ocrd_network/rabbitmq_utils/connector.py @@ -6,6 +6,7 @@ from typing import Any, Optional, Union from pika import BasicProperties, BlockingConnection, ConnectionParameters, PlainCredentials from pika.adapters.blocking_connection import BlockingChannel +from ocrd_utils import config from .constants import ( DEFAULT_EXCHANGER_NAME, DEFAULT_EXCHANGER_TYPE, @@ -69,8 +70,7 @@ def open_blocking_connection( port=port, virtual_host=vhost, credentials=credentials, - # TODO: The heartbeat should not be disabled (0)! - heartbeat=0 + heartbeat=config.OCRD_NETWORK_RABBITMQ_HEARTBEAT ), ) return blocking_connection diff --git a/src/ocrd_network/runtime_data/deployer.py b/src/ocrd_network/runtime_data/deployer.py index b956904d07..919d5b97ce 100644 --- a/src/ocrd_network/runtime_data/deployer.py +++ b/src/ocrd_network/runtime_data/deployer.py @@ -8,7 +8,7 @@ """ from __future__ import annotations from pathlib import Path -from subprocess import Popen, run as subprocess_run +import psutil from time import sleep from typing import Dict, List, Union @@ -30,6 +30,8 @@ def __init__(self, config_path: str) -> None: self.data_hosts: List[DataHost] = parse_hosts_data(ps_config["hosts"]) self.internal_callback_url = ps_config.get("internal_callback_url", None) self.mets_servers: Dict = {} # {"mets_server_url": "mets_server_pid"} + # This is required to store UDS urls that are multiplexed through the TCP proxy and are not preserved anywhere + self.mets_servers_paths: Dict = {} # {"ws_dir_path": "mets_server_url"} self.use_tcp_mets = ps_config.get("use_tcp_mets", False) # TODO: Reconsider this. @@ -146,25 +148,33 @@ def start_uds_mets_server(self, ws_dir_path: str) -> Path: if is_mets_server_running(mets_server_url=str(mets_server_url)): self.log.debug(f"The UDS mets server for {ws_dir_path} is already started: {mets_server_url}") return mets_server_url + elif Path(mets_server_url).is_socket(): + self.log.warning( + f"The UDS mets server for {ws_dir_path} is not running but the socket file exists: {mets_server_url}." + "Removing to avoid any weird behavior before starting the server.") + Path(mets_server_url).unlink() self.log.info(f"Starting UDS mets server: {mets_server_url}") - pid = OcrdMetsServer.create_process(mets_server_url=mets_server_url, ws_dir_path=ws_dir_path, log_file=log_file) - self.mets_servers[mets_server_url] = pid + pid = OcrdMetsServer.create_process(mets_server_url=str(mets_server_url), ws_dir_path=str(ws_dir_path), log_file=str(log_file)) + self.mets_servers[str(mets_server_url)] = pid + self.mets_servers_paths[str(ws_dir_path)] = str(mets_server_url) return mets_server_url - def stop_uds_mets_server(self, mets_server_url: str, stop_with_pid: bool = False) -> None: + def stop_uds_mets_server(self, mets_server_url: str, path_to_mets: str) -> None: self.log.info(f"Stopping UDS mets server: {mets_server_url}") - if stop_with_pid: - if Path(mets_server_url) not in self.mets_servers: - message = f"UDS Mets server not found at URL: {mets_server_url}" - self.log.exception(message) - raise Exception(message) - mets_server_pid = self.mets_servers[Path(mets_server_url)] - OcrdMetsServer.kill_process(mets_server_pid=mets_server_pid) - return - # TODO: Reconsider this again - # Not having this sleep here causes connection errors - # on the last request processed by the processing worker. - # Sometimes 3 seconds is enough, sometimes not. - sleep(5) - stop_mets_server(mets_server_url=mets_server_url) + self.log.info(f"Path to the mets file: {path_to_mets}") + self.log.debug(f"mets_server: {self.mets_servers}") + self.log.debug(f"mets_server_paths: {self.mets_servers_paths}") + workspace_path = str(Path(path_to_mets).parent) + mets_server_url_uds = self.mets_servers_paths[workspace_path] + mets_server_pid = self.mets_servers[mets_server_url_uds] + self.log.info(f"Terminating mets server with pid: {mets_server_pid}") + p = psutil.Process(mets_server_pid) + stop_mets_server(self.log, mets_server_url=mets_server_url, ws_dir_path=workspace_path) + if p.is_running(): + p.wait() + self.log.info(f"Terminated mets server with pid: {mets_server_pid}") + else: + self.log.info(f"Mets server with pid: {mets_server_pid} has already terminated.") + del self.mets_servers_paths[workspace_path] + del self.mets_servers[mets_server_url_uds] return diff --git a/src/ocrd_network/server_cache.py b/src/ocrd_network/server_cache.py index b57f3fd235..179a76139d 100644 --- a/src/ocrd_network/server_cache.py +++ b/src/ocrd_network/server_cache.py @@ -31,7 +31,7 @@ def check_if_locked_pages_for_output_file_grps( self, workspace_key: str, output_file_grps: List[str], page_ids: List[str] ) -> bool: if not self.locked_pages.get(workspace_key, None): - self.log.debug(f"No entry found in the locked pages cache for workspace key: {workspace_key}") + self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}") return False debug_message = f"Caching the received request due to locked output file grp pages." for file_group in output_file_grps: @@ -46,46 +46,45 @@ def check_if_locked_pages_for_output_file_grps( def get_locked_pages(self, workspace_key: str) -> Dict[str, List[str]]: if not self.locked_pages.get(workspace_key, None): - self.log.debug(f"No locked pages available for workspace key: {workspace_key}") + self.log.info(f"No locked pages available for workspace key: {workspace_key}") return {} return self.locked_pages[workspace_key] def lock_pages(self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]) -> None: if not self.locked_pages.get(workspace_key, None): - self.log.debug(f"No entry found in the locked pages cache for workspace key: {workspace_key}") - self.log.debug(f"Creating an entry in the locked pages cache for workspace key: {workspace_key}") + self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}") + self.log.info(f"Creating an entry in the locked pages cache for workspace key: {workspace_key}") self.locked_pages[workspace_key] = {} for file_group in output_file_grps: if file_group not in self.locked_pages[workspace_key]: - self.log.debug(f"Creating an empty list for output file grp: {file_group}") + self.log.info(f"Creating an empty list for output file grp: {file_group}") self.locked_pages[workspace_key][file_group] = [] # The page id list is not empty - only some pages are in the request if page_ids: - self.log.debug(f"Locking pages for '{file_group}': {page_ids}") + self.log.info(f"Locking pages for '{file_group}': {page_ids}") self.locked_pages[workspace_key][file_group].extend(page_ids) - self.log.debug(f"Locked pages of '{file_group}': " - f"{self.locked_pages[workspace_key][file_group]}") + self.log.info(f"Locked pages of '{file_group}': {self.locked_pages[workspace_key][file_group]}") else: # Lock all pages with a single value - self.log.debug(f"Locking pages for '{file_group}': {self.placeholder_all_pages}") + self.log.info(f"Locking pages for '{file_group}': {self.placeholder_all_pages}") self.locked_pages[workspace_key][file_group].append(self.placeholder_all_pages) def unlock_pages(self, workspace_key: str, output_file_grps: List[str], page_ids: List[str]) -> None: if not self.locked_pages.get(workspace_key, None): - self.log.debug(f"No entry found in the locked pages cache for workspace key: {workspace_key}") + self.log.info(f"No entry found in the locked pages cache for workspace key: {workspace_key}") return for file_group in output_file_grps: if file_group in self.locked_pages[workspace_key]: if page_ids: # Unlock the previously locked pages - self.log.debug(f"Unlocking pages of '{file_group}': {page_ids}") + self.log.info(f"Unlocking pages of '{file_group}': {page_ids}") self.locked_pages[workspace_key][file_group] = \ [x for x in self.locked_pages[workspace_key][file_group] if x not in page_ids] - self.log.debug(f"Remaining locked pages of '{file_group}': " - f"{self.locked_pages[workspace_key][file_group]}") + self.log.info(f"Remaining locked pages of '{file_group}': " + f"{self.locked_pages[workspace_key][file_group]}") else: # Remove the single variable used to indicate all pages are locked - self.log.debug(f"Unlocking all pages for: {file_group}") + self.log.info(f"Unlocking all pages for: {file_group}") self.locked_pages[workspace_key][file_group].remove(self.placeholder_all_pages) @@ -127,11 +126,11 @@ def __print_job_input_debug_message(self, job_input: PYJobInput): debug_message += f", page ids: {job_input.page_id}" debug_message += f", job id: {job_input.job_id}" debug_message += f", job depends on: {job_input.depends_on}" - self.log.debug(debug_message) + self.log.info(debug_message) async def consume_cached_requests(self, workspace_key: str) -> List[PYJobInput]: if not self.has_workspace_cached_requests(workspace_key=workspace_key): - self.log.debug(f"No jobs to be consumed for workspace key: {workspace_key}") + self.log.info(f"No jobs to be consumed for workspace key: {workspace_key}") return [] found_consume_requests = [] for current_element in self.processing_requests[workspace_key]: @@ -165,25 +164,27 @@ def update_request_counter(self, workspace_key: str, by_value: int) -> int: # If a record counter of this workspace key does not exist # in the requests counter cache yet, create one and assign 0 if not self.processing_counter.get(workspace_key, None): - self.log.debug(f"Creating an internal request counter for workspace key: {workspace_key}") + self.log.info(f"Creating an internal request counter for workspace key: {workspace_key}") self.processing_counter[workspace_key] = 0 self.processing_counter[workspace_key] = self.processing_counter[workspace_key] + by_value + self.log.info(f"The new request counter of {workspace_key}: {self.processing_counter[workspace_key]}") return self.processing_counter[workspace_key] def cache_request(self, workspace_key: str, data: PYJobInput): # If a record queue of this workspace key does not exist in the requests cache if not self.processing_requests.get(workspace_key, None): - self.log.debug(f"Creating an internal request queue for workspace_key: {workspace_key}") + self.log.info(f"Creating an internal request queue for workspace_key: {workspace_key}") self.processing_requests[workspace_key] = [] self.__print_job_input_debug_message(job_input=data) # Add the processing request to the end of the internal queue + self.log.info(f"Caching a processing request of {workspace_key}: {data.job_id}") self.processing_requests[workspace_key].append(data) async def cancel_dependent_jobs(self, workspace_key: str, processing_job_id: str) -> List[PYJobInput]: if not self.has_workspace_cached_requests(workspace_key=workspace_key): - self.log.debug(f"No jobs to be cancelled for workspace key: {workspace_key}") + self.log.info(f"No jobs to be cancelled for workspace key: {workspace_key}") return [] - self.log.debug(f"Cancelling jobs dependent on job id: {processing_job_id}") + self.log.info(f"Cancelling jobs dependent on job id: {processing_job_id}") found_cancel_requests = [] for i, current_element in enumerate(self.processing_requests[workspace_key]): if processing_job_id in current_element.depends_on: @@ -192,7 +193,7 @@ async def cancel_dependent_jobs(self, workspace_key: str, processing_job_id: str for cancel_element in found_cancel_requests: try: self.processing_requests[workspace_key].remove(cancel_element) - self.log.debug(f"For job id: '{processing_job_id}', cancelling job id: '{cancel_element.job_id}'") + self.log.info(f"For job id: '{processing_job_id}', cancelling job id: '{cancel_element.job_id}'") cancelled_jobs.append(cancel_element) await db_update_processing_job(job_id=cancel_element.job_id, state=JobState.cancelled) # Recursively cancel dependent jobs for the cancelled job @@ -225,9 +226,11 @@ async def sync_is_caching_required(self, job_dependencies: List[str]) -> bool: def has_workspace_cached_requests(self, workspace_key: str) -> bool: if not self.processing_requests.get(workspace_key, None): - self.log.debug(f"In processing requests cache, no workspace key found: {workspace_key}") + self.log.info(f"In processing requests cache, no workspace key found: {workspace_key}") return False if not len(self.processing_requests[workspace_key]): - self.log.debug(f"The processing requests cache is empty for workspace key: {workspace_key}") + self.log.info(f"The processing requests cache is empty for workspace key: {workspace_key}") return False + self.log.info(f"The processing requests cache has {len(self.processing_requests[workspace_key])} " + f"entries for workspace key: {workspace_key} ") return True diff --git a/src/ocrd_network/server_utils.py b/src/ocrd_network/server_utils.py index 9d8628170c..6e485f261f 100644 --- a/src/ocrd_network/server_utils.py +++ b/src/ocrd_network/server_utils.py @@ -1,12 +1,18 @@ +import os +import re +import signal +from pathlib import Path +from json import dumps, loads +from urllib.parse import urljoin +from typing import Dict, List, Optional, Union +from time import time + from fastapi import HTTPException, status, UploadFile from fastapi.responses import FileResponse from httpx import AsyncClient, Timeout -from json import dumps, loads from logging import Logger -from pathlib import Path from requests import get as requests_get -from typing import Dict, List, Union -from urllib.parse import urljoin +from requests_unixsocket import sys from ocrd.resolver import Resolver from ocrd.task_sequence import ProcessorTask @@ -241,3 +247,33 @@ def validate_first_task_input_file_groups_existence(logger: Logger, mets_path: s if group not in available_groups: message = f"Input file group '{group}' of the first processor not found: {input_file_grps}" raise_http_exception(logger, status.HTTP_422_UNPROCESSABLE_ENTITY, message) + + +def kill_mets_server_zombies(minutes_ago : Optional[int], dry_run : Optional[bool]) -> List[int]: + if minutes_ago == None: + minutes_ago = 90 + if dry_run == None: + dry_run = False + + now = time() + cmdline_pat = r'.*ocrd workspace -U.*server start $' + ret = [] + for procdir in sorted(Path('/proc').glob('*'), key=os.path.getctime): + if not procdir.is_dir(): + continue + cmdline_file = procdir.joinpath('cmdline') + if not cmdline_file.is_file(): + continue + ctime_ago = int((now - procdir.stat().st_ctime) / 60) + if ctime_ago < minutes_ago: + continue + cmdline = cmdline_file.read_text().replace('\x00', ' ') + if re.match(cmdline_pat, cmdline): + pid = int(procdir.name) + ret.append(pid) + print(f'METS Server with PID {pid} was created {ctime_ago} minutes ago, more than {minutes_ago}, so killing (cmdline="{cmdline})', file=sys.stderr) + if dry_run: + print(f'[dry_run is active] kill {pid}') + else: + os.kill(pid, signal.SIGTERM) + return ret diff --git a/src/ocrd_network/tcp_to_uds_mets_proxy.py b/src/ocrd_network/tcp_to_uds_mets_proxy.py index 176f4f1442..3f335435ab 100644 --- a/src/ocrd_network/tcp_to_uds_mets_proxy.py +++ b/src/ocrd_network/tcp_to_uds_mets_proxy.py @@ -1,5 +1,5 @@ from requests_unixsocket import Session as requests_unixsocket_session -from .utils import get_uds_path +from .utils import get_uds_path, convert_url_to_uds_format from typing import Dict from ocrd_utils import getLogger @@ -31,9 +31,13 @@ def forward_tcp_request(self, request_body) -> Dict: if method_type not in SUPPORTED_METHOD_TYPES: raise NotImplementedError(f"Method type: {method_type} not recognized") ws_socket_file = str(get_uds_path(ws_dir_path=ws_dir_path)) - ws_unix_socket_url = f'http+unix://{ws_socket_file.replace("/", "%2F")}' + ws_unix_socket_url = convert_url_to_uds_format(ws_socket_file) uds_request_url = f"{ws_unix_socket_url}/{request_url}" + self.log.info(f"Forwarding TCP mets server request to UDS url: {uds_request_url}") + self.log.info(f"Forwarding method type {method_type}, request data: {request_data}, " + f"expected response type: {response_type}") + if not request_data: response = self.session.request(method_type, uds_request_url) elif "params" in request_data: @@ -45,12 +49,11 @@ def forward_tcp_request(self, request_body) -> Dict: else: raise ValueError("Expecting request_data to be empty or containing single key: params," f"form, or class but not {request_data.keys}") - + if response_type == "empty": + return {} if not response: self.log.error(f"Uds-Mets-Server gives unexpected error. Response: {response.__dict__}") return {"error": response.text} - elif response_type == "empty": - return {} elif response_type == "text": return {"text": response.text} elif response_type == "class" or response_type == "dict": diff --git a/src/ocrd_network/utils.py b/src/ocrd_network/utils.py index a2f563de43..5abe2104fd 100644 --- a/src/ocrd_network/utils.py +++ b/src/ocrd_network/utils.py @@ -4,6 +4,7 @@ from functools import wraps from hashlib import md5 from json import loads +from logging import Logger from pathlib import Path from re import compile as re_compile, split as re_split from requests import get as requests_get, Session as Session_TCP @@ -151,22 +152,25 @@ def is_mets_server_running(mets_server_url: str, ws_dir_path: str = None) -> boo return False -def stop_mets_server(mets_server_url: str, ws_dir_path: str = None) -> bool: +def stop_mets_server(logger: Logger, mets_server_url: str, ws_dir_path: str) -> bool: protocol = "tcp" if (mets_server_url.startswith("http://") or mets_server_url.startswith("https://")) else "uds" - session = Session_TCP() if protocol == "tcp" else Session_UDS() - if protocol == "uds": - mets_server_url = convert_url_to_uds_format(mets_server_url) - try: - if 'tcp_mets' in mets_server_url: - if not ws_dir_path: - return False - response = session.post(url=f"{mets_server_url}", json=MpxReq.stop(ws_dir_path)) - else: - response = session.delete(url=f"{mets_server_url}/") - except Exception: - return False - return response.status_code == 200 - + # If the mets server URL is the proxy endpoint + if protocol == "tcp" and "tcp_mets" in mets_server_url: + # Convert the mets server url to UDS format + ws_socket_file = str(get_uds_path(ws_dir_path)) + mets_server_url = convert_url_to_uds_format(ws_socket_file) + protocol = "uds" + if protocol == "tcp": + request_json = MpxReq.stop(ws_dir_path) + logger.info(f"Sending POST request to: {mets_server_url}, request_json: {request_json}") + response = Session_TCP().post(url=f"{mets_server_url}", json=request_json) + return response.status_code == 200 + elif protocol == "uds": + logger.info(f"Sending DELETE request to: {mets_server_url}/") + response = Session_UDS().delete(url=f"{mets_server_url}/") + return response.status_code == 200 + else: + ValueError(f"Unexpected protocol type: {protocol}") def get_uds_path(ws_dir_path: str) -> Path: return Path(config.OCRD_NETWORK_SOCKETS_ROOT_DIR, f"{safe_filename(ws_dir_path)}.sock") diff --git a/src/ocrd_page_user_methods.py b/src/ocrd_page_user_methods.py index fe22dd89ab..9cec0b30ad 100644 --- a/src/ocrd_page_user_methods.py +++ b/src/ocrd_page_user_methods.py @@ -104,7 +104,7 @@ def _add_method(class_re, method_name, file_name=None): _add_method(r'^(OrderedGroupType|OrderedGroupIndexedType)$', 'clear_AllIndexed'), _add_method(r'^(OrderedGroupType|OrderedGroupIndexedType)$', 'extend_AllIndexed'), _add_method(r'^(OrderedGroupType|OrderedGroupIndexedType)$', 'sort_AllIndexed'), - _add_method(r'^(OrderedGroupType|OrderedGroupIndexedType)$', 'exportChildren', 'exportChildren_GroupType'), + _add_method(r'^(OrderedGroupType|OrderedGroupIndexedType)$', 'exportChildren', '_exportChildren_GroupType'), _add_method(r'^(UnorderedGroupType|UnorderedGroupIndexedType)$', 'get_UnorderedGroupChildren'), _add_method(r'^(PcGtsType|PageType)$', 'id'), _add_method(r'^(PageType)$', 'get_AllRegions'), diff --git a/src/ocrd_page_user_methods/exportChildren_GroupType.py b/src/ocrd_page_user_methods/_exportChildren_GroupType.py similarity index 65% rename from src/ocrd_page_user_methods/exportChildren_GroupType.py rename to src/ocrd_page_user_methods/_exportChildren_GroupType.py index 924ee63146..9dea9c422e 100644 --- a/src/ocrd_page_user_methods/exportChildren_GroupType.py +++ b/src/ocrd_page_user_methods/_exportChildren_GroupType.py @@ -1,9 +1,14 @@ # pylint: disable=line-too-long,invalid-name,missing-module-docstring,missing-function-docstring -def exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments - namespaceprefix_ = 'pc:' +def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True): # pylint: disable=unused-argument,too-many-arguments + if pretty_print: + eol_ = '\n' + else: + eol_ = '' if self.UserDefined is not None: + namespaceprefix_ = self.UserDefined_nsprefix_ + ':' if (UseCapturedNS_ and self.UserDefined_nsprefix_) else '' self.UserDefined.export(outfile, level, namespaceprefix_, namespacedef_='', name_='UserDefined', pretty_print=pretty_print) for Labels_ in self.Labels: + namespaceprefix_ = self.Labels_nsprefix_ + ':' if (UseCapturedNS_ and self.Labels_nsprefix_) else '' Labels_.export(outfile, level, namespaceprefix_, namespacedef_='', name_='Labels', pretty_print=pretty_print) cleaned = [] def replaceWithRRI(group): @@ -21,4 +26,4 @@ def replaceWithRRI(group): else: cleaned.append(entry) for entry in cleaned: - entry.export(outfile, level, namespaceprefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print) + entry.export(outfile, level, entry.ns_prefix_, namespacedef_='', name_=entry.__class__.__name__[:-4], pretty_print=pretty_print) diff --git a/src/ocrd_utils/config.py b/src/ocrd_utils/config.py index 36399870e2..022a84d600 100644 --- a/src/ocrd_utils/config.py +++ b/src/ocrd_utils/config.py @@ -21,7 +21,7 @@ def _parser_boolean(val): class OcrdEnvVariable(): - def __init__(self, name, description, parser=str, validator=lambda val: True, default=[False, None]): + def __init__(self, name, description, parser=str, validator=lambda _: True, default=[False, None]): """ An environment variable for use in OCR-D. @@ -47,10 +47,19 @@ def __str__(self): return f'{self.name}: {self.description}' def describe(self, wrap_text=True, indent_text=True): + """ + Output help information on a config option. + + If ``option.description`` is a multiline string with complex formatting + (e.g. markdown lists), replace empty lines with ``\b`` and set + ``wrap_text`` to ``False``. + """ desc = self.description if self.has_default: default = self.default() if callable(self.default) else self.default - desc += f' (Default: "{default}")' + if not desc.endswith('\n'): + desc += ' ' + desc += f'(Default: "{default}")' ret = '' ret = f'{self.name}\n' if wrap_text: @@ -146,11 +155,11 @@ def raw_value(self, name): description="""\ Whether to enable gathering runtime statistics on the `ocrd.profile` logger (comma-separated): - +\b - `CPU`: yields CPU and wall-time, - `RSS`: also yields peak memory (resident set size) - `PSS`: also yields peak memory (proportional set size) - +\b """, validator=lambda val : all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')), default=(True, '')) @@ -183,11 +192,12 @@ def _ocrd_download_timeout_parser(val): config.add("OCRD_MISSING_INPUT", description="""\ -How to deal with missing input files (for some fileGrp/pageId) during processing: - +How to deal with missing input files +(for some fileGrp/pageId) during processing: +\b - `SKIP`: ignore and proceed with next page's input - `ABORT`: throw :py:class:`.MissingInputFile` - +\b """, default=(True, 'SKIP'), validator=lambda val: val in ['SKIP', 'ABORT'], @@ -195,12 +205,13 @@ def _ocrd_download_timeout_parser(val): config.add("OCRD_MISSING_OUTPUT", description="""\ -How to deal with missing output files (for some fileGrp/pageId) during processing: - +How to deal with missing output files +(for some fileGrp/pageId) during processing: +\b - `SKIP`: ignore and proceed processing next page - `COPY`: fall back to copying input PAGE to output fileGrp for page - `ABORT`: re-throw whatever caused processing to fail - +\b """, default=(True, 'SKIP'), validator=lambda val: val in ['SKIP', 'COPY', 'ABORT'], @@ -213,12 +224,13 @@ def _ocrd_download_timeout_parser(val): config.add("OCRD_EXISTING_OUTPUT", description="""\ -How to deal with already existing output files (for some fileGrp/pageId) during processing: - +How to deal with already existing output files +(for some fileGrp/pageId) during processing: +\b - `SKIP`: ignore and proceed processing next page - `OVERWRITE`: force writing result to output fileGrp for page - `ABORT`: re-throw :py:class:`FileExistsError` - +\b """, default=(True, 'SKIP'), validator=lambda val: val in ['SKIP', 'OVERWRITE', 'ABORT'], @@ -231,7 +243,7 @@ def _ocrd_download_timeout_parser(val): config.add("OCRD_NETWORK_CLIENT_POLLING_SLEEP", description="How many seconds to sleep before trying again.", parser=int, - default=(True, 30)) + default=(True, 10)) config.add("OCRD_NETWORK_CLIENT_POLLING_TIMEOUT", description="Timeout for a blocking ocrd network client (in seconds).", @@ -247,9 +259,19 @@ def _ocrd_download_timeout_parser(val): default=(True, '')) config.add("OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS", - description="Number of attempts for a RabbitMQ client to connect before failing.", + description="Number of attempts for a RabbitMQ client to connect before failing.", + parser=int, + default=(True, 3)) + +config.add( + name="OCRD_NETWORK_RABBITMQ_HEARTBEAT", + description=""" + Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value + proposed by broker. Use 0 to deactivate heartbeat. + """, parser=int, - default=(True, 3)) + default=(True, 0) +) config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR", description="The root directory where all mets server related socket files are created", diff --git a/src/ocrd_utils/logging.py b/src/ocrd_utils/logging.py index 181805118d..52b01883f1 100644 --- a/src/ocrd_utils/logging.py +++ b/src/ocrd_utils/logging.py @@ -46,13 +46,8 @@ 'setOverrideLogLevel', ] -# These are the loggers we add handlers to -ROOT_OCRD_LOGGERS = [ - 'ocrd', - 'ocrd_network' -] - LOGGING_DEFAULTS = { + '': logging.WARNING, 'ocrd': logging.INFO, 'ocrd_network': logging.INFO, # 'ocrd.resolver': logging.INFO, @@ -113,18 +108,15 @@ def setOverrideLogLevel(lvl, silent=not config.OCRD_LOGGING_DEBUG): lvl (string): Log level name. silent (boolean): Whether to log the override call """ - if not _initialized_flag: - initLogging(silent=silent) - ocrd_logger = logging.getLogger('ocrd') - - if lvl is None: - if not silent: - print('[LOGGING] Reset log level override', file=sys.stderr) - ocrd_logger.setLevel(logging.NOTSET) - else: - if not silent: - print(f'[LOGGING] Overriding ocrd log level to {lvl}', file=sys.stderr) - ocrd_logger.setLevel(lvl) + if lvl is not None: + lvl = getLevelName(lvl) + if not _initialized_flag: + initLogging(silent=silent) + # affect all configured loggers + for logger_name in logging.root.manager.loggerDict: + if not silent: + print(f'[LOGGING] Overriding {logger_name} log level to {lvl}', file=sys.stderr) + logging.getLogger(logger_name).setLevel(lvl) def get_logging_config_files(): """ @@ -158,20 +150,11 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L - silent (bool): Whether to log logging behavior by printing to stderr """ global _initialized_flag - if _initialized_flag and not force_reinit: - return - # disableLogging() - - # https://docs.python.org/3/library/logging.html#logging.disable - # If logging.disable(logging.NOTSET) is called, it effectively removes this - # overriding level, so that logging output again depends on the effective - # levels of individual loggers. - logging.disable(logging.NOTSET) - - # remove all handlers for the ocrd root loggers - for logger_name in ROOT_OCRD_LOGGERS: - for handler in logging.getLogger(logger_name).handlers[:]: - logging.getLogger(logger_name).removeHandler(handler) + if _initialized_flag: + if force_reinit: + disableLogging(silent=silent) + else: + return config_file = None if not builtin_only: @@ -190,8 +173,8 @@ def initLogging(builtin_only=False, force_reinit=False, silent=not config.OCRD_L ocrd_handler = logging.StreamHandler(stream=sys.stderr) ocrd_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT)) ocrd_handler.setLevel(logging.DEBUG) - for logger_name in ROOT_OCRD_LOGGERS: - logging.getLogger(logger_name).addHandler(ocrd_handler) + root_logger = logging.getLogger('') + root_logger.addHandler(ocrd_handler) for logger_name, logger_level in LOGGING_DEFAULTS.items(): logging.getLogger(logger_name).setLevel(logger_level) _initialized_flag = True @@ -207,24 +190,16 @@ def disableLogging(silent=not config.OCRD_LOGGING_DEBUG): if _initialized_flag and not silent: print("[LOGGING] Disabling logging", file=sys.stderr) _initialized_flag = False - # logging.basicConfig(level=logging.CRITICAL) - # logging.disable(logging.ERROR) - # remove all handlers for the ocrd logger - for logger_name in ROOT_OCRD_LOGGERS + ['']: - for handler in logging.getLogger(logger_name).handlers[:]: - logging.getLogger(logger_name).removeHandler(handler) - for logger_name in LOGGING_DEFAULTS: - logging.getLogger(logger_name).setLevel(logging.NOTSET) + # remove all handlers we might have added (via initLogging on builtin or file config) + for logger_name in logging.root.manager.loggerDict: + if not silent: + print(f'[LOGGING] Resetting {logger_name} log level and handlers') + logger = logging.getLogger(logger_name) + logger.setLevel(logging.NOTSET) + for handler in logger.handlers[:]: + logger.removeHandler(handler) + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) # Python default log level is WARNING logging.root.setLevel(logging.WARNING) -# Initializing stream handlers at module level -# would cause message output in all runtime contexts, -# including those which are already run for std output -# (--dump-json, --version, ocrd-tool, bashlib etc). -# So this needs to be an opt-in from the CLIs/decorators: -#initLogging() -# Also, we even have to block log output for libraries -# (like matplotlib/tensorflow) which set up logging -# themselves already: -disableLogging() diff --git a/src/ocrd_utils/ocrd_logging.conf b/src/ocrd_utils/ocrd_logging.conf index 5cf161398e..41e6d5af7a 100644 --- a/src/ocrd_utils/ocrd_logging.conf +++ b/src/ocrd_utils/ocrd_logging.conf @@ -34,7 +34,7 @@ keys=defaultFormatter,detailedFormatter # default logger "root" using consoleHandler # [logger_root] -level=INFO +level=WARNING handlers=consoleHandler,fileHandler @@ -56,22 +56,22 @@ handlers=consoleHandler,fileHandler # ocrd loggers [logger_ocrd] level=INFO -handlers=consoleHandler,fileHandler +handlers= qualname=ocrd -propagate=0 [logger_ocrd_network] level=INFO -handlers=consoleHandler,processingServerHandler +#handlers=consoleHandler,processingServerHandler +handlers=processingServerHandler qualname=ocrd_network -propagate=0 +#propagate=0 # # logger tensorflow # [logger_ocrd_tensorflow] level=ERROR -handlers=consoleHandler +handlers= qualname=tensorflow # @@ -79,7 +79,7 @@ qualname=tensorflow # [logger_ocrd_shapely_geos] level=ERROR -handlers=consoleHandler +handlers= qualname=shapely.geos @@ -88,7 +88,7 @@ qualname=shapely.geos # [logger_ocrd_PIL] level=INFO -handlers=consoleHandler +handlers= qualname=PIL # @@ -96,34 +96,32 @@ qualname=PIL # [logger_paramiko] level=INFO -handlers=consoleHandler +handlers= qualname=paramiko -propagate=0 [logger_paramiko_transport] level=INFO -handlers=consoleHandler +handlers= qualname=paramiko.transport -propagate=0 # # uvicorn loggers # [logger_uvicorn] level=INFO -handlers=consoleHandler +handlers= qualname=uvicorn [logger_uvicorn_access] level=WARN -handlers=consoleHandler +handlers= qualname=uvicorn.access [logger_uvicorn_error] level=INFO -handlers=consoleHandler +handlers= qualname=uvicorn.error [logger_multipart] level=INFO -handlers=consoleHandler +handlers= qualname=multipart diff --git a/tests/base.py b/tests/base.py index 53f393e08d..9eb1f20db8 100644 --- a/tests/base.py +++ b/tests/base.py @@ -26,8 +26,6 @@ class TestCase(VanillaTestCase): def setUp(self): chdir(dirname(realpath(__file__)) + '/..') - disableLogging() - initLogging(builtin_only=True) class CapturingTestCase(TestCase): """ diff --git a/tests/cli/test_log.py b/tests/cli/test_log.py index c63d78c318..3d81e8266b 100644 --- a/tests/cli/test_log.py +++ b/tests/cli/test_log.py @@ -6,8 +6,8 @@ from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory from ocrd.decorators import ocrd_loglevel -from ocrd_utils import setOverrideLogLevel, logging, disableLogging -import logging as python_logging +from ocrd_utils import disableLogging, initLogging +import logging @click.group() @ocrd_loglevel @@ -18,14 +18,19 @@ def mock_ocrd_cli(log_level): class TestLogCli(TestCase): def _get_log_output(self, *args): - disableLogging() code, out, err = self.invoke_cli(mock_ocrd_cli, args) print({'code': code, 'out': out, 'err': err}) return err + def setUp(self): + super().setUp() + initLogging() + def tearDown(self): if 'OCRD_TOOL_NAME' in ENV: del(ENV['OCRD_TOOL_NAME']) + super().tearDown() + disableLogging() def test_loglevel(self): assert 'DEBUG ocrd.log_cli - foo' not in self._get_log_output('log', 'debug', 'foo') diff --git a/tests/model/test_exif.py b/tests/model/test_exif.py index f6771fb8ee..18c5e4c467 100644 --- a/tests/model/test_exif.py +++ b/tests/model/test_exif.py @@ -24,7 +24,13 @@ ('leptonica_samples/data/OCR-D-IMG/OCR-D-IMG_1555_007.jpg', 944, 1472, 1, 1, 1, 'inches', 'RGB', None), ('kant_aufklaerung_1784-jp2/data/OCR-D-IMG/INPUT_0020.jp2', - 1457, 2084, 1, 1, 1, 'inches', 'RGB', None) + 1457, 2084, 1, 1, 1, 'inches', 'RGB', None), + # tolerate multi-frame TIFF: + ('gutachten/data/IMG/IMG_1.tif', + 2088, 2634, 300, 300, 300, 'inches', 'RGB', 'raw'), + # multi-frame TIFF with metric pixel density (is actually YCBCR not RGB but Pillow thinks otherwise...) + ('indian-ferns/data/OCR-D-IMG/0004.tif', + 2626, 3620, 28, 28, 28, 'cm', 'RGB', 'jpeg'), ]) def test_ocrd_exif(path, width, height, xResolution, yResolution, resolution, resolutionUnit, photometricInterpretation, compression): """Check EXIF attributes for different input formats diff --git a/tests/network/config.py b/tests/network/config.py index e22cc6ce9d..611ad63821 100644 --- a/tests/network/config.py +++ b/tests/network/config.py @@ -89,11 +89,19 @@ test_config.add( name="OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS", + description="Number of attempts for a RabbitMQ client to connect before failing", + parser=int, + default=(True, 3) +) + +test_config.add( + name="OCRD_NETWORK_RABBITMQ_HEARTBEAT", description=""" - Number of attempts for a RabbitMQ client to connect before failing + Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value + proposed by broker. Use 0 to deactivate heartbeat. """, parser=int, - default=(True, 3) + default=(True, 0) ) test_config.add( diff --git a/tests/network/test_modules_mets_server_proxy.py b/tests/network/test_modules_mets_server_proxy.py index 8b8c0d35f7..f19d7e415e 100644 --- a/tests/network/test_modules_mets_server_proxy.py +++ b/tests/network/test_modules_mets_server_proxy.py @@ -119,7 +119,7 @@ def test_find_files(start_uds_mets_server): {"file_grp": test_file_group} ) response_dict = MetsServerProxy().forward_tcp_request(request_body=request_body) - assert len(response_dict["files"]) == 3, "Expected to find exatly 3 matching files" + assert len(response_dict["files"]) == 3, "Expected to find exactly 3 matching files" request_body = MpxReq.find_files( TEST_WORKSPACE_DIR, {"file_grp": test_non_existing_file_group} diff --git a/tests/processor/test_processor.py b/tests/processor/test_processor.py index 33a9548811..06c129c3ca 100644 --- a/tests/processor/test_processor.py +++ b/tests/processor/test_processor.py @@ -27,21 +27,21 @@ class TestProcessor(TestCase): + def run(self, result=None): + with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as workdir: + with pushd_popd(workdir): + self.resolver = Resolver() + self.workspace = self.resolver.workspace_from_url('mets.xml') + super().run(result=result) + def setUp(self): super().setUp() - # make sure we get an isolated temporary copy of the testdata each time - # as long as we are not using pytest but unittest, we need to manage contexts - # (enterContext is only supported starting with py311) - with ExitStack() as stack: - self.resolver = Resolver() - self.workdir = stack.enter_context(copy_of_directory(assets.path_to('SBB0000F29300010000/data'))) - stack.enter_context(pushd_popd(self.workdir)) - self.workspace = self.resolver.workspace_from_url('mets.xml') - self.addCleanup(stack.pop_all().close) + initLogging() def tearDown(self): super().tearDown() config.reset_defaults() + disableLogging() def test_incomplete_processor(self): proc = IncompleteProcessor(None) @@ -277,7 +277,6 @@ def test_run_output_timeout(self): assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG")) config.OCRD_EXISTING_OUTPUT = 'OVERWRITE' config.OCRD_PROCESSING_PAGE_TIMEOUT = 1 - from concurrent.futures import TimeoutError with pytest.raises(TimeoutError) as exc: run_processor(DummyProcessorWithOutputSleep, workspace=ws, input_file_grp="OCR-D-IMG", @@ -424,6 +423,7 @@ def ocrd_tool(self): def test_run_output_metsserver(start_mets_server): mets_server_url, ws = start_mets_server + assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == 0 # do not raise for number of failures: config.OCRD_MAX_MISSING_OUTPUTS = -1 run_processor(DummyProcessorWithOutputSleep, workspace=ws, @@ -447,22 +447,33 @@ def test_run_output_metsserver(start_mets_server): parameter={"sleep": 0}, mets_server_url=mets_server_url) assert "already exists" in str(exc.value) + config.reset_defaults() # 2s (+ 2s tolerance) instead of 3*3s (+ 2s tolerance) -@pytest.mark.timeout(4) +# fixme: pytest-timeout does not shut down / finalize the fixture properly +# (regardless of method or func_only), so the next test in the suite +# does not execute ("previous item was not torn down properly") +# so we must instead wait for completion and assert on the time spent... +#@pytest.mark.timeout(timeout=4, func_only=True, method="signal") def test_run_output_parallel(start_mets_server): + import time mets_server_url, ws = start_mets_server + assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == 0 # do not raise for single-page timeout config.OCRD_PROCESSING_PAGE_TIMEOUT = -1 # do not raise for number of failures: config.OCRD_MAX_MISSING_OUTPUTS = -1 config.OCRD_MAX_PARALLEL_PAGES = 3 + start_time = time.time() run_processor(DummyProcessorWithOutputSleep, workspace=ws, input_file_grp="OCR-D-IMG", output_file_grp="OCR-D-OUT", parameter={"sleep": 2}, mets_server_url=mets_server_url) + run_time = time.time() - start_time + assert run_time < 3, f"run_processor took {run_time}s" assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG")) + config.reset_defaults() if __name__ == "__main__": main(__file__) diff --git a/tests/test_decorators.py b/tests/test_decorators.py index c36577020a..561fdc762d 100644 --- a/tests/test_decorators.py +++ b/tests/test_decorators.py @@ -41,22 +41,20 @@ def cli_dummy_processor(*args, **kwargs): class TestDecorators(TestCase): - def setUp(self): - super().setUp() - disableLogging() - def tearDown(self): super().tearDown() config.reset_defaults() + disableLogging() def test_minimal(self): - exit_code, out, err = self.invoke_cli(cli_with_ocrd_cli_options, ['-l', 'DEBUG']) - print(out, err) - assert not exit_code + initLogging() + code, out, err = self.invoke_cli(cli_with_ocrd_cli_options, ['-l', 'DEBUG']) + assert not code, (out, err) def test_loglevel_invalid(self): - code, _, err = self.invoke_cli(cli_with_ocrd_loglevel, ['--log-level', 'foo']) - assert code + initLogging() + code, out, err = self.invoke_cli(cli_with_ocrd_loglevel, ['--log-level', 'foo']) + assert code, (out, err) import click if int(click.__version__[0]) < 8: assert 'invalid choice: foo' in err @@ -67,7 +65,6 @@ def test_loglevel_override(self): if get_logging_config_files(): pytest.skip(f"ocrd_logging.conf found at {get_logging_config_files()}, skipping logging test") import logging - disableLogging() assert logging.getLogger('').getEffectiveLevel() == logging.WARNING assert logging.getLogger('ocrd').getEffectiveLevel() == logging.WARNING initLogging() diff --git a/tests/test_logging.py b/tests/test_logging.py index c2b6913b10..091fc25bee 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -26,16 +26,22 @@ class TestLogging(TestCase): def setUp(self): pass # do not chdir + def tearDown(self): + super().tearDown() + disableLogging() + def test_loglevel_inheritance(self): initLogging(builtin_only=True) ocrd_logger = logging.getLogger('ocrd') assert ocrd_logger.getEffectiveLevel() == logging.INFO some_logger = getLogger('ocrd.foo') + assert some_logger.level == logging.NOTSET assert some_logger.getEffectiveLevel() == logging.INFO setOverrideLogLevel('ERROR') assert ocrd_logger.getEffectiveLevel() == logging.ERROR assert some_logger.getEffectiveLevel() == logging.ERROR another_logger = getLogger('ocrd.bar') + assert another_logger.level == logging.NOTSET assert another_logger.getEffectiveLevel() == logging.ERROR def test_getLevelName(self): diff --git a/tests/test_logging_conf.py b/tests/test_logging_conf.py index f8e0e9e894..0717674103 100644 --- a/tests/test_logging_conf.py +++ b/tests/test_logging_conf.py @@ -21,74 +21,67 @@ # sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../ocrd') TEST_ROOT = pathlib.Path(os.path.dirname(os.path.abspath(__file__))).parent -def resetLogging(): - disableLogging() - initLogging() - - @pytest.fixture(name="logging_conf") -def _fixture_logging_conf(tmpdir): +def _fixture_logging_conf(tmpdir, capfd): path_logging_conf_orig = os.path.join( str(TEST_ROOT), 'src', 'ocrd_utils', 'ocrd_logging.conf') path_logging_conf_dest = os.path.join(str(tmpdir), 'ocrd_logging.conf') shutil.copy(path_logging_conf_orig, path_logging_conf_dest) - return str(tmpdir) + with pushd_popd(tmpdir): + with capfd.disabled(): + initLogging() + yield str(tmpdir) + disableLogging() -def test_configured_dateformat(logging_conf, capsys): +def test_configured_dateformat(logging_conf, capfd): """Ensure example ocrd_logging.conf is valid and produces desired record format""" # arrange - with pushd_popd(logging_conf): - resetLogging() - test_logger = getLogger('') + test_logger = getLogger('ocrd') - # act - test_logger.info("test logger initialized") + # act + test_logger.info("test logger initialized") - log_info_output = capsys.readouterr().err - must_not_match = r"^\d{4}-\d{2}-\d{2}.*" - assert not re.match(must_not_match, log_info_output) - match_pattern = r"^\d{2}:\d{2}:\d{2}.*" - assert re.match(match_pattern, log_info_output) + log_info_output = capfd.readouterr().err + must_not_match = r"^\d{4}-\d{2}-\d{2}.*" + assert not re.match(must_not_match, log_info_output) + match_pattern = r"^\d{2}:\d{2}:\d{2}.*" + assert re.match(match_pattern, log_info_output), log_info_output -def test_configured_tensorflow_logger_present(logging_conf, capsys): +def test_configured_tensorflow_logger_present(logging_conf, capfd): """Ensure example ocrd_logging.conf is valid and contains logger tensorflow""" # arrange - os.chdir(logging_conf) - resetLogging() logger_under_test = getLogger('tensorflow') # act info logger_under_test.info("tensorflow logger initialized") - log_info_output = capsys.readouterr().err + log_info_output = capfd.readouterr().err assert not log_info_output # act error logger_under_test.error("tensorflow has error") - log_error_output = capsys.readouterr().err + log_error_output = capfd.readouterr().err assert log_error_output -def test_configured_shapely_logger_present(logging_conf, capsys): +def test_configured_shapely_logger_present(logging_conf, capfd): """Ensure example ocrd_logging.conf is valid and contains logger shapely.geos""" # arrange - os.chdir(logging_conf) - resetLogging() logger_under_test = getLogger('shapely.geos') # act info logger_under_test.info("shapely.geos logger initialized") - log_info_output = capsys.readouterr().err + log_info_output = capfd.readouterr().err assert not log_info_output # act error logger_under_test.error("shapely alert") - log_error_output = capsys.readouterr().err + log_error_output = capfd.readouterr().err assert log_error_output if __name__ == '__main__': diff --git a/tests/test_mets_server.py b/tests/test_mets_server.py index dc94d6c560..3bb96535c0 100644 --- a/tests/test_mets_server.py +++ b/tests/test_mets_server.py @@ -22,20 +22,17 @@ from requests.exceptions import ConnectionError from ocrd import Resolver, OcrdMetsServer, Workspace -from ocrd_utils import pushd_popd, MIMETYPE_PAGE, initLogging, setOverrideLogLevel +from ocrd_utils import pushd_popd, MIMETYPE_PAGE, initLogging, setOverrideLogLevel, disableLogging, getLogger TRANSPORTS = ['/tmp/ocrd-mets-server.sock', 'http://127.0.0.1:12345'] -initLogging() -setOverrideLogLevel(10) - @fixture(scope='function', name='start_mets_server', params=TRANSPORTS) def fixture_start_mets_server(request, tmpdir) -> Iterable[Tuple[str, Workspace]]: - tmpdir = str(tmpdir) - def _start_mets_server(*args, **kwargs): - mets_server = OcrdMetsServer(*args, **kwargs) - mets_server.startup() + initLogging() + #setOverrideLogLevel(10) + logger = getLogger('ocrd') + tmpdir = str(tmpdir) mets_server_url = request.param if mets_server_url == TRANSPORTS[0]: @@ -47,13 +44,26 @@ def _start_mets_server(*args, **kwargs): copytree(assets.path_to('SBB0000F29300010000/data'), tmpdir) workspace = Workspace(Resolver(), tmpdir) - p = Process(target=_start_mets_server, kwargs={'workspace': workspace, 'url': request.param}) + class MetsServerProcess(Process): + def __init__(self, *args, **kwargs): + self.server = OcrdMetsServer(*args, **kwargs) + super().__init__() + def run(self): + self.server.startup() + def terminate(self): + self.server.workspace.save_mets() + super().terminate() + p = MetsServerProcess(workspace=workspace, url=request.param) p.start() + logger.info("started METS Server") sleep(1) # sleep to start up server workspace_server = Workspace(Resolver(), tmpdir, mets_server_url=mets_server_url) yield mets_server_url, workspace_server p.terminate() + p.join() + logger.info("terminated METS Server") rmtree(tmpdir, ignore_errors=True) + disableLogging() def add_file_server(x, force=False): mets_server_url, directory, i = x diff --git a/tests/test_resolver.py b/tests/test_resolver.py index c2575b6086..97d2ee6658 100644 --- a/tests/test_resolver.py +++ b/tests/test_resolver.py @@ -118,7 +118,7 @@ def test_workspace_from_url_kant_with_resources(mock_request, tmp_path): @patch.object(Session, "get") def test_workspace_from_url_kant_with_resources_existing_local(mock_request, tmp_path): """ - Fail with clobber_mets=False, succeeed with clobber_mets=True + Fail with clobber_mets=False, succeed with clobber_mets=True """ # arrange diff --git a/tests/test_resource_manager.py b/tests/test_resource_manager.py index 653167e10a..286f6ea6b0 100644 --- a/tests/test_resource_manager.py +++ b/tests/test_resource_manager.py @@ -80,7 +80,7 @@ def test_resources_manager_from_environment(tmp_path, monkeypatch): assert mgr.userdir == tmp_path -def test_resources_manager_config_explicite(tmp_path): +def test_resources_manager_config_explicit(tmp_path): # act from ocrd.resource_manager import OcrdResourceManager