diff --git a/.circleci/config.yml b/.circleci/config.yml index 1e64ed6..7ba22cf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ jobs: - run: apt-get update && apt-get install -y --no-install-recommends make wget unzip - checkout - run: make deps-ubuntu - - run: make install VIRTUAL_ENV=/usr/local + - run: make install - run: ocrd resmgr download -n https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/german_print/tessdata_best/german_print_0.877_1254744_7309067.traineddata ocrd-tesserocr-recognize german_print.traineddata diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b05c3a5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +test/ +dist/ +build/ diff --git a/.gitignore b/.gitignore index eff4653..404b10a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ *~ test/data* +.pytest_cache +__pycache__ +*.py[cod] +*.egg-info +/build +/dist diff --git a/Dockerfile b/Dockerfile index 4cede7d..2e272d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,22 +3,29 @@ FROM $DOCKER_BASE_IMAGE ARG VCS_REF ARG BUILD_DATE LABEL \ - maintainer="https://github.com/bertsky/workflow-configuration/issues" \ + maintainer="https://ocr-d.de/en/contact" \ org.label-schema.vcs-ref=$VCS_REF \ org.label-schema.vcs-url="https://github.com/bertsky/workflow-configuration" \ - org.label-schema.build-date=$BUILD_DATE + org.label-schema.build-date=$BUILD_DATE \ + org.opencontainers.image.vendor="DFG-Funded Initiative for Optical Character Recognition Development" \ + org.opencontainers.image.title="workflow-configuration" \ + org.opencontainers.image.description="" \ + org.opencontainers.image.source="https://github.com/bertsky/workflow-configuration" \ + org.opencontainers.image.documentation="https://github.com/bertsky/workflow-configuration/blob/${VCS_REF}/README.md" \ + org.opencontainers.image.revision=$VCS_REF \ + org.opencontainers.image.created=$BUILD_DATE \ + org.opencontainers.image.base.name=ocrd/core SHELL ["/bin/bash", "-c"] -WORKDIR /build/workflow-configuration +WORKDIR /build/module +COPY . . COPY ocrd-tool.json . -COPY ocrd-make ocrd-import ocrd-page-transform xsl-transform . -COPY Makefile *.mk . -COPY *.xsl . -COPY README.md . +# prepackage ocrd-tool.json as ocrd-all-tool.json +RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json +# install everything and reduce image size RUN make deps-ubuntu -RUN make install VIRTUAL_ENV=/usr/local -RUN rm -fr /build/workflow-configuration +RUN make install && rm -fr /build/module WORKDIR /data VOLUME ["/data"] diff --git a/Makefile b/Makefile index 4004368..89eeec4 100644 --- a/Makefile +++ b/Makefile @@ -1,35 +1,21 @@ # OCR-D workflow configuration installation makefile # # Install workflow configurations persistently by running: -# `make install` -# (in the git repo), which will copy workflow.mk (as `Makefile`), all -# preconfigured makefiles, and some shell scripts into -# a fixed target directory under the VIRTUAL_ENV prefix). -# -# For installation via shell-script: -VIRTUAL_ENV ?= $(CURDIR)/local -# copy `ocrd-make` here: -BINDIR = $(abspath $(VIRTUAL_ENV))/bin -# copy the makefiles here: -SHAREDIR = $(abspath $(VIRTUAL_ENV))/share/workflow-configuration - -# we need associative arrays, process substitution etc. -# also, fail on failed intermediates as well: -SHELL = bash -o pipefail - -CONFIGURATION := $(abspath $(firstword $(MAKEFILE_LIST))) +# `make install` or `pip install .` (in the git repo), +# which will copy all distribution files (`Makefile` and all +# preconfigured makefiles `*.mk`, as well as some Python +# scripts and XSL transforms `*.xsl`) into the Python +# site directory. +# Using venv is recommended. -CONFIGDIR := $(dir $(CONFIGURATION)) +PYTHON = python3 +PIP = pip3 +PYTHONIOENCODING=utf8 -EXISTING_MAKEFILES = $(patsubst $(CONFIGDIR)/%,%,$(wildcard $(CONFIGDIR)/*.mk)) -EXISTING_TRANSFORMS = $(patsubst $(CONFIGDIR)/%,%,$(wildcard $(CONFIGDIR)/*.xsl)) +SHELL = bash -o pipefail -ifeq ($(filter workflow.mk,$(EXISTING_MAKEFILES)),) -$(error "Found no .mk makefiles in source directory $(CONFIGDIR)") -endif -ifeq ($(EXISTING_TRANSFORMS),) -$(error "Found no .xsl transforms in source directory $(CONFIGDIR)") -endif +DOCKER_BASE_IMAGE = docker.io/ocrd/core:v3.1.0 +DOCKER_TAG = bertsky/workflow-configuration help: @echo "Installing OCR-D workflow configurations:" @@ -39,51 +25,46 @@ help: @echo @echo " Targets:" @echo " * help (this message)" - @echo " * test (run test suite)" - @echo " * deps-ubuntu (install extra system packages needed here, beyond ocrd and processors)" - @echo " * install (copy $(SHPROGS) and configuration makefiles to" - @echo " * VIRTUAL_ENV=$(VIRTUAL_ENV)" - @echo " * from repository workdir)" - @echo " * uninstall (remove $(SHPROGS) and configuration makefiles from" - @echo " * VIRTUAL_ENV=$(VIRTUAL_ENV))" + @echo " * deps-ubuntu (install system packages needed here, beyond ocrd and processors)" + @echo " * deps (install Python packages needed here)" + @echo " * install (install this package via $(PIP)" + @echo " build (build source and binary distribution)" + @echo " * uninstall (remove this package via $(PIP)" @echo " * %.mk (any filename with suffix .mk not existing yet: spawn new makefile from pattern)" @echo " * test (run test suite)" @echo @echo " Variables:" @echo - @echo " * VIRTUAL_ENV: directory prefix to use for installation" + @echo " * PYTHON (name of Python version binary [$(PYTHON)])" + @echo " * PIP (name of Python pip version binary [$(PIP)])" .PHONY: help deps-ubuntu: apt-get -y install parallel xmlstarlet bc sed libdbd-sqlite3-perl -XSLPROGS =$(EXISTING_TRANSFORMS:%.xsl=%) -SHPROGS = ocrd-make ocrd-import ocrd-page-transform -PROGS = $(SHPROGS) $(XSLPROGS) -install-bin: $(PROGS:%=$(BINDIR)/%) | $(BINDIR) - -$(SHPROGS:%=$(BINDIR)/%): $(BINDIR)/%: % - sed 's,^SHAREDIR=.*,SHAREDIR="$(SHAREDIR)",' < $< > $@ - chmod +x $@ +deps: requirements.txt + $(PIP) install -r $< -$(XSLPROGS:%=$(BINDIR)/%): %: xsl-transform - sed 's,^SHAREDIR=.*,SHAREDIR="$(SHAREDIR)",' < $< > $@ - chmod +x $@ +install: + $(PIP) install . -$(BINDIR) $(SHAREDIR): - @mkdir -p $@ +install-dev: + $(PIP) install -e . -install: install-bin | $(SHAREDIR) - cp -Lf $(EXISTING_MAKEFILES) $(EXISTING_TRANSFORMS) ocrd-tool.json $(SHAREDIR) - mv $(SHAREDIR)/workflow.mk $(SHAREDIR)/Makefile +build: + $(PIP) install build + $(PYTHON) -m build . uninstall: - $(RM) $(PROGS:%=$(BINDIR)/%) - $(RM) -r $(SHAREDIR) + $(PIP) uninstall workflow_configuration +TEST_WORKFLOW = -f all-tess-MODEL.mk MODEL=german_print \ + -f transform.mk TROPTIONS="-P xsl page-extract-text.xsl \ + -P xslt-params '-s level=line' -P mimetype text/plain" \ + -f cat-files.mk define testrecipe = -function testfun { pushd `mktemp -d` && cp -pr $(abspath $^) . && /usr/bin/time ocrd-make -f all-tess-MODEL.mk MODEL=german_print LOGLEVEL=ERROR $(^F) "$$@" && $(RM) -r $$DIRSTACK; }; testfun +function testfun { pushd `mktemp -d` && cp -pr $(abspath $^) . && /usr/bin/time ocrd-make $(TEST_WORKFLOW) LOGLEVEL=ERROR $(^F) "$$@" && cat $(^F:%=%.*.log) && $(RM) -r $$DIRSTACK; }; testfun endef test: test/data1 test/data2 $(testrecipe) @@ -106,16 +87,15 @@ test/data2: ocrd workspace -d $@ rename-group ORIGINAL OCR-D-IMG ocrd workspace -d $@ prune-files -DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda-torch:v2.69.0 -DOCKER_TAG ?= bertsky/workflow-configuration docker: docker build \ + -t $(DOCKER_TAG) \ --build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \ - --build-arg VCS_REF=$$(git rev-parse --short HEAD) \ - --build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \ - -t $(DOCKER_TAG) . + --build-arg VCS_REF=$(git rev-parse --short HEAD) \ + --build-arg BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ") \ + . -.PHONY: deps-ubuntu install install-bin uninstall test docker +.PHONY: deps-ubuntu deps install install-dev build uninstall test docker # spawn a new configuration define skeleton = @@ -123,24 +103,21 @@ define skeleton = # # Install by copying (or symlinking) makefiles into a directory # where all OCR-D workspaces (unpacked BagIts) reside and running -# `make` there (or including files from there). +# `ocrd-make` there (or including files from there). # # Call via: -# `make -f WORKFLOW-CONFIG.mk` +# `ocrd-make -f WORKFLOW-CONFIG.mk` # # To rebuild partially, you must pass -W to `make`: -# `make -f WORKFLOW-CONFIG.mk -W FILEGRP` +# `ocrd-make -f WORKFLOW-CONFIG.mk -W FILEGRP` # # To build in parallel, use `-j [CPUS] [-l [LOADLEVEL]]` etc. # # To get general help: -# `make -f WORKFLOW-CONFIG.mk help` +# `ocrd-make --help` # # To get a description of the workflow: -# `make -f WORKFLOW-CONFIG.mk info` - -### -# From here on, custom configuration begins. +# `ocrd-make -f WORKFLOW-CONFIG.mk info` INPUT = OCR-D-IMG @@ -150,7 +127,7 @@ $$(INPUT): OUTPUT = OCR-D-OUT $$(OUTPUT): $$(INPUT) $$(OUTPUT): TOOL = ocrd-dummy -$$(OUTPUT): PARAMS = +$$(OUTPUT): OPTIONS = info: @echo "This is a dummy configuration that creates a copy $$(OUTPUT) of the input fileGrp $$(INPUT)" @@ -159,11 +136,6 @@ info: .DEFAULT_GOAL = $$(OUTPUT) -# Down here, custom configuration ends. -### - -SELFDIR := $$(dir $$(abspath $$(firstword $$(MAKEFILE_LIST)))) -include $$(SELFDIR)/Makefile endef export skeleton @@ -171,12 +143,9 @@ export skeleton %.mk: @echo >$@ "$$skeleton" + # do not search for implicit rules here: %/Makefile: ; Makefile: ; -local.mk: ; ocrd-tool.json: ; -$(CONFIGURATION): ; -$(EXISTING_MAKEFILES): ; -$(EXISTING_TRANSFORMS): ; -$(PROGS): ; +local.mk: ; diff --git a/README.md b/README.md index 9fe2c76..41e7f65 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ ## OCR-D workflow configurations based on makefiles -This provides an attempt at running [OCR-D](https://ocr-d.de) workflows configured and controlled via makefiles using [GNU bash](http://www.gnu.org/software/bash), [GNU make](http://www.gnu.org/software/make/) and [GNU parallel](http://www.gnu.org/software/parallel). +This provides an attempt at running [OCR-D](https://ocr-d.de) workflows +configured and controlled via makefiles using [GNU bash](http://www.gnu.org/software/bash), +[GNU make](http://www.gnu.org/software/make/) and [GNU parallel](http://www.gnu.org/software/parallel). Makefilization offers the following _advantages_: @@ -63,9 +65,14 @@ Or equivalently, install the following packages: - `xmlstarlet` - `bc` and `sed` -Additionally, you must of course install [ocrd](https://github.com/OCR-D/core) itself along with its dependencies in the current shell environment. Moreover, depending on the specific configurations you want to use (i.e. the processors it contains), additional modules must be installed. See [OCR-D setup guide](https://ocr-d.de/en/setup) for instructions. +Additionally, you must of course install [ocrd](https://github.com/OCR-D/core) itself +along with its dependencies in the current Python virtual environment (venv). Moreover, +depending on the specific configurations you want to use (i.e. the processors it contains), +additional modules must be installed. See [OCR-D setup guide](https://ocr-d.de/en/setup) +for instructions. -(Yes, `workflow-configuration` is already part of [ocrd_all](https://github.com/OCR-D/ocrd_all), which is also available on [Dockerhub](https://hub.docker.com/r/ocrd/all).) +(Yes, `workflow-configuration` is already part of [ocrd_all](https://github.com/OCR-D/ocrd_all), +which is also available on [Dockerhub](https://hub.docker.com/r/ocrd/all).) ### Installation @@ -74,18 +81,13 @@ Run: make install -... if you are in a (Python) virtual environment. Otherwise specify the installation prefix directory via environment variable `VIRTUAL_ENV`. +... if you are in a (Python) virtual environment, which is recommended. -Assuming `$VIRTUAL_ENV/bin` is in your `PATH`, you can then call: - - cd WORKSPACE && make [OPTIONS] -f WORKFLOW-CONFIG.mk - make -C WORKSPACE [OPTIONS] -f WORKFLOW-CONFIG.mk - -... for processing single workspace directory, or ... +You can then call: ocrd-make [OPTIONS] -f WORKFLOW-CONFIG.mk WORKSPACE... -... for processing multiple workspaces at once (with the same interface as above). +... for processing any number of workspace directories. Where: @@ -95,10 +97,12 @@ Where: Calling workflows is possible from anywhere in your filesystem, but for the `WORKFLOW_CONFIG.mk` you may need to: -- either provide the `*.mk` configurations in the source directory at installation time (to ensure they are installed under the installation prefix and can always be found by file name only) +- either provide the `*.mk` configurations in the source directory at installation time + (to ensure they are installed under the site prefix and can always be found by file name) - or provide full paths at runtime (by absolute path name, or relative to the CWD). -(The previous version of `ocrd-make` tried to copy or symlink all makefiles to the runtime directory. You can still use those, but should remove the old `Makefile`.) +(The previous version of `ocrd-make` tried to copy or symlink all makefiles to the runtime directory. + You can still use those, but should remove the old `Makefile`.) ### Docker Image @@ -107,7 +111,8 @@ Instead of the above native installation steps, you can use the prebuilt image f docker pull bertsky/workflow-configuration docker run -V /path/to/data:/data bertsky/workflow-configuration ocrd-make ... -For general guidance on using Docker with OCR-D, see [User Guide](https://ocr-d.de/en/user_guide#translating-native-commands-to-docker-calls). +For general guidance on using Docker with OCR-D, see +[User Guide](https://ocr-d.de/en/user_guide#translating-native-commands-to-docker-calls). ### Usage @@ -126,21 +131,41 @@ To get help for the import tool:
-Usage: ocrd-import [OPTIONS] [DIRECTORY] - -with options: - -i|--ignore keep going after unknown file types - -s|--skip SUFFIX ignore file names ending in given SUFFIX (repeatable) - -R|--regex EXPR only include paths matching given EXPR (repeatable) - -C|--no-convert do not attempt to convert image file types - -r|--render DPI when converting PDFs, render at DPI pixel density - -P|--nonnum-ids do not use numeric pageIds but basename patterns - -B|--basename only use basename for IDs - -Create OCR-D workspace meta-data (mets.xml) in DIRECTORY (or /home/xbert/unsortiert/arbeit/heyer/tools/ocrd_tesserocr), importing... -* all image files (with known file extension or convertible via ImageMagick) under fileGrp OCR-D-IMG -* all .xml files (if they validate as PAGE-XML) under fileGrp OCR-D-SEG-PAGE -...but failing otherwise. +Usage: ocrd-import [OPTIONS] WORKSPACE_DIR + + Create OCR-D workspace meta-data (mets.xml) in WORKSPACE_DIR (or $PWD), importing... + * all image files (with known file extension or convertible via ImageMagick) under fileGrp `image_group` + * all .xml files (if they validate as PAGE-XML) under fileGrp `pagexml_group` + * all .xml files (if they validate as ALTO-XML) under fileGrp `altoxml_group` + ...but failing otherwise (unless `ignore` is set) + +Options: + -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE] + Log level + -i, --ignore keep going after unknown file types + -s, --skip SUFFIX ignore file names ending in given SUFFIX + (repeatable) + -R, --regex EXPR only include paths matching given EXPR + (repeatable) + -C, --no-convert do not attempt to convert image file types + -r, --render DPI when converting PDFs, render at DPI pixel + density [default: 300] + -P, --nonnum-ids do not use numeric pageIds but basename + patterns + -B, --basename only use basename for IDs + -n, --dry-run only show resulting METS to stdout via pager + -I, --image-group TEXT fileGrp to place detected or converted + images into [default: OCR-D-IMG] + -X, --pagexml-group TEXT fileGrp to place detected PAGE-XML into + [default: OCR-D-PAGE] + -A, --altoxml-group TEXT fileGrp to place detected ALTO-XML into + [default: OCR-D-ALTO] + -G, --directory-groups instead of assigning files to `image_group` + or `pagexml_group`, and trying to convert + everything else to images, create a group + for every subdirectory and auto-detect its + MIME types + -h, --help Show this message and exit.@@ -162,6 +187,8 @@ To perform various tasks via XSLT on PAGE-XML files (these all share the same op page-remove-metadataitem # remove all MetadataItem entries page-remove-dead-regionrefs # remove non-existing regionRefs page-remove-empty-readingorder # remove empty ReadingOrder or groups + page-remove-empty-text-regions # remove empty TextRegion entries + page-remove-empty-lines # remove empty TextLine entries page-remove-all-regions # remove all *Region (and TextLine and Word and Glyph) entries page-remove-text-regions # remove all TextRegion (and TextLine and Word and Glyph) entries page-remove-regions # remove all *Region (and TextLine and Word and Glyph) entries of $type @@ -189,21 +216,26 @@ To perform various tasks via XSLT on PAGE-XML files (these all share the same op
Usage: NAME [OPTIONS] [FILE] -with options: - -s name=value set param NAME to string literal VALUE (repeatable) - -p name=value set param NAME to XPath expression VALUE (repeatable) - -i|--inplace overwrite input file with result of transformation - -P|--pretty pretty-print output (line breaks with indentation) - -d|--diff show diff between input and output - -D|--dump just print the transformation stylesheet (XSL) - -h|--help just show this message - -Open PAGE-XML file FILE (or stdin) and apply the XSL transformation "NAME.xsl" -Write the result to stdout, unless... - -i / --inplace is given - in which case the result is written back to the - file silently, or - -d / --diff is given - in which case the result will be compared to the - input and a patch shown on stdout. + Open PAGE file XMLFILE (or stdin) and apply the XSL transformation "page-add-nsprefix-pc.xsl" + Write the result to stdout, unless... + -i / --inplace is given - in which case the result is written back to the + file silently, or + -d / --diff is given - in which case the result will be compared to the + input and a patch shown on stdout. + +Options: + -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE] + Log level + -s, --string-param NAME=VALUE set param NAME to string literal VALUE + -p, --xpath-param NAME=VALUE set param NAME to XPath expression VALUE + -i, --inplace overwrite input file with result of + transformation + -P, --pretty pretty-print output (line breaks with + indentation + -d, --diff show diff between input and output via pager + -D, --dump just print the transformation stylesheet + (XSL) + -h, --help Show this message and exit.@@ -232,50 +264,59 @@ use `ocrd-page-transform` and pass the filename of the transformation as paramet
-Usage: ocrd-page-transform [OPTIONS]
+Usage: ocrd-page-transform [worker|server] [OPTIONS]
apply arbitrary XSL transformation file for PAGE-XML
- > Processor base class and helper functions. A processor is a tool
- > that implements the uniform OCR-D command-line interface for run-
- > time data processing. That is, it executes a single workflow step,
- > or a combination of workflow steps, on the workspace (represented by
- > local METS). It reads input files for all or requested physical
- > pages of the input fileGrp(s), and writes output files for them into
- > the output fileGrp(s). It may take a number of optional or
- > mandatory parameters. Process the :py:attr:`workspace` from the
- > given :py:attr:`input_file_grp` to the given
- > :py:attr:`output_file_grp` for the given :py:attr:`page_id` under
- > the given :py:attr:`parameter`.
-
- > (This contains the main functionality and needs to be overridden by
- > subclasses.)
+ > Transform pages with the given XSLT.
-Options:
+ > Open the input PAGE element hierarchy and process it with the XSLT
+ > processor parsed from the `xsl` resource file, passing `xslt-params`
+ > as XSLT parameters (if any).
+
+ > Generate a new PAGE object from the resulting hierarchy, finally
+ > serialise and add it as new output file.
+
+Subcommands:
+ worker Start a processing worker rather than do local processing
+ server Start a processor server rather than do local processing
+
+Options for processing:
+ -m, --mets URL-PATH URL or file path of METS to process [./mets.xml]
+ -w, --working-dir PATH Working directory of local workspace [dirname(URL-PATH)]
-I, --input-file-grp USE File group(s) used as input
-O, --output-file-grp USE File group(s) used as output
- -g, --page-id ID Physical page ID(s) to process
+ -g, --page-id ID Physical page ID(s) to process instead of full document []
--overwrite Remove existing output pages/images
- (with --page-id, remove only those)
+ (with "--page-id", remove only those).
+ Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE
+ --debug Abort on any errors with full stack trace.
+ Short-hand for OCRD_MISSING_OUTPUT=ABORT
--profile Enable profiling
- --profile-file Write cProfile stats to this file. Implies --profile
+ --profile-file PROF-PATH Write cProfile stats to PROF-PATH. Implies "--profile"
-p, --parameter JSON-PATH Parameters, either verbatim JSON string
or JSON file path
-P, --param-override KEY VAL Override a single JSON object key-value pair,
taking precedence over --parameter
- -m, --mets URL-PATH URL or file path of METS to process
- -w, --working-dir PATH Working directory of local workspace
+ -U, --mets-server-url URL URL of a METS Server for parallel incremental access to METS
+ If URL starts with http:// start an HTTP server there,
+ otherwise URL is a path to an on-demand-created unix socket
-l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE]
- Log level
+ Override log level globally [INFO]
+ --log-filename LOG-PATH File to redirect stderr logging to (overriding ocrd_logging.conf).
+
+Options for information:
-C, --show-resource RESNAME Dump the content of processor resource RESNAME
-L, --list-resources List names of processor resources
- -J, --dump-json Dump tool description as JSON and exit
- -h, --help This help message
+ -J, --dump-json Dump tool description as JSON
+ -D, --dump-module-dir Show the 'module' resource location path for this processor
+ -h, --help Show this message
-V, --version Show version
Parameters:
"xsl" [string - REQUIRED]
- File path of the XSL transformation script
+ File path of the XSL transformation script (see `ocrd resmgr` for
+ prepackaged and user-installed files available by file name)
"xslt-params" [string - ""]
Assignment of XSL transformation parameter values, given as in
`xmlstarlet` (which differentiates between `-s name=value` for
@@ -284,9 +325,10 @@ Parameters:
"pretty-print" [number - 0]
Reformat with line breaks and this many spaces of indentation after
XSL transformation (unless zero).
- "mimetype" [string - "application/vnd.prima.page+xml"]
+ "mimetype" [string - "application/vnd.prima.page+xml"]
MIME type to register the output files under (should correspond to
`xsl` result)
+
@@ -308,21 +350,26 @@ likewise wrapped as standalone CLIs `mets-...`:
Usage: NAME [OPTIONS] [FILE] -with options: - -s name=value set param NAME to string literal VALUE (repeatable) - -p name=value set param NAME to XPath expression VALUE (repeatable) - -i|--inplace overwrite input file with result of transformation - -P|--pretty pretty-print output (line breaks with indentation) - -d|--diff show diff between input and output - -D|--dump just print the transformation stylesheet (XSL) - -h|--help just show this message - -Open METS-XML file FILE (or stdin) and apply the XSL transformation "NAME.xsl" -Write the result to stdout, unless... - -i / --inplace is given - in which case the result is written back to the - file silently, or - -d / --diff is given - in which case the result will be compared to the - input and a patch shown on stdout. + Open METS file XMLFILE (or stdin) and apply the XSL transformation "mets-copy-agents.xsl" + Write the result to stdout, unless... + -i / --inplace is given - in which case the result is written back to the + file silently, or + -d / --diff is given - in which case the result will be compared to the + input and a patch shown on stdout. + +Options: + -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE] + Log level + -s, --string-param NAME=VALUE set param NAME to string literal VALUE + -p, --xpath-param NAME=VALUE set param NAME to XPath expression VALUE + -i, --inplace overwrite input file with result of + transformation + -P, --pretty pretty-print output (line breaks with + indentation + -d, --diff show diff between input and output via pager + -D, --dump just print the transformation stylesheet + (XSL) + -h, --help Show this message and exit.@@ -338,14 +385,11 @@ To run a configuration... (Yes, you can have to look inside and browse its rules!) 3. Execute: - cd WORKSPACE && make [OPTIONS] -f WORKFLOW-CONFIG.mk # or - make -C WORKSPACE [OPTIONS] -f WORKFLOW-CONFIG.mk - - ... for processing single workspace directory, or ... - ocrd-make [OPTIONS] -f WORKFLOW-CONFIG.mk all - (The special target `all` (which is also the default goal) will search for all workspaces in the current directory recursively.) You can also run on a subset of workspaces by passing these as goals on the command line... + (The special target `all` (which is also the default goal) will search for all workspaces + in the current directory recursively.) You can also run on a subset of workspaces + by passing these as goals on the command line... ocrd-make -f WORKFLOW-CONFIG.mk PATH/TO/WORKSPACE1 PATH/TO/WORKSPACE2 ... @@ -403,22 +447,22 @@ Options -j and -l are intercepted.) To get help: - [ocrd-]make help + ocrd-make help To get a short description of the chosen configuration: - [ocrd-]make -f CONFIGURATION.mk info + ocrd-make -f CONFIGURATION.mk info To see the command sequence that would be executed for the chosen configuration (in the format of `ocrd process`): - [ocrd-]make -f CONFIGURATION.mk show + ocrd-make -f CONFIGURATION.mk show To run a workflow server for the command sequence that would be executed for the chosen configuration (to be controlled via `ocrd workflow client` or HTTP): - [ocrd-]make -f CONFIGURATION.mk server + ocrd-make -f CONFIGURATION.mk server To spawn a new configuration file, in the directory of the source repository, do: @@ -443,11 +487,6 @@ You can also use that pattern to specify any fileGrp other than the `.DEFAULT_GO ocrd-make -f CONFIGURATION.mk .DEFAULT_GOAL=OCR-D-SEG-LINE all -If you run `make` in the workspace directly instead of having `ocrd-make` do it recursively, then no `all` target exists and you can directly set the target fileGrp to replace `.DEFAULT_GOAL`: - - make -C WORKSPACE -f CONFIGURATION.mk -W OCR-D-BIN - make -C WORKSPACE -f CONFIGURATION.mk OCR-D-SEG-LINE - There are 6 **special variables** and 1 **additional option**: ##### LOGLEVEL @@ -455,14 +494,12 @@ There are 6 **special variables** and 1 **additional option**: To override the default (or configured) log levels for all processors and libraries, use `LOGLEVEL`. For example, to get debugging everywhere, do: ocrd-make -f CONFIGURATION.mk all LOGLEVEL=DEBUG - make -C WORKSPACE -f CONFIGURATION.mk LOGLEVEL=DEBUG ##### PAGES To process only a subset of pages in all fileGrps, set `PAGES`. For example, to only consider pages `PHYS_0005` through `PHYS_0007`, do: ocrd-make -f CONFIGURATION.mk all PAGES=PHYS_0005..PHYS_0007 - make -C WORKSPACE -f CONFIGURATION.mk PAGES=PHYS_0005..PHYS_0007 The variable gets interpreted as the usual [--page-id parameter](https://ocr-d.de/en/spec/cli#-g---page-id-id) by processors, so it supports range expressions, comma-separated lists and regular expressions. @@ -470,7 +507,6 @@ range expressions, comma-separated lists and regular expressions. If the METS provides physical page labels (`@ORDER` or `@ORDERLABEL`), then these work as well: ocrd-make -f CONFIGURATION.mk all PAGES=5..7 - make -C WORKSPACE -f CONFIGURATION.mk PAGES=5..7 ##### TIMEOUT @@ -582,7 +618,6 @@ Next, edit the file to your needs: Write rules using file groups as prerequisite #### Recommendations -- Keep the comments and the `include Makefile` directive in the file. - Change/customize at least the `info` target, and the `INPUT` and `OUTPUT` name/rule. - Copy/paste rules from the existing configurations. - Define variables with the names of all target/prerequisite file groups, so rules and dependent targets can re-use them (and the names can be easily changed later). @@ -632,8 +667,6 @@ EVAL: TOOL = ocrd-cor-asv-ann-evaluate # we must override the default goal to be our desired overall target: .DEFAULT_GOAL = EVAL -# ALWAYS necessary: -include Makefile ``` ### Testing diff --git a/all-tess-MODEL.mk b/all-tess-MODEL.mk deleted file mode 100644 index 5963286..0000000 --- a/all-tess-MODEL.mk +++ /dev/null @@ -1,45 +0,0 @@ -# This file can run a workflow on a single workspace (non-recursively). -# -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside and running -# `make` there (or including files from there). -# -# Call via: -# `make -f WORKFLOW-CONFIG.mk` -# -# To rebuild partially, you must pass -W to `make`: -# `make -f WORKFLOW-CONFIG.mk -W FILEGRP` -# -# To build in parallel, use `-j [CPUS] [-l [LOADLEVEL]]` etc. -# -# To get general help: -# `make -f WORKFLOW-CONFIG.mk help` -# -# To get a description of the workflow: -# `make -f WORKFLOW-CONFIG.mk info` - -### -# From here on, custom configuration begins. - -INPUT = OCR-D-IMG - -$(INPUT): - ocrd workspace find -G $@ --download - -OUTPUT = OCR-D-OCR-TESS -$(OUTPUT): $(INPUT) -$(OUTPUT): TOOL = ocrd-tesserocr-recognize -$(OUTPUT): PARAMS = "segmentation_level": "region", "model": "$(or $(MODEL),Fraktur+Latin)", "shrink_polygons": true #, "auto_model": true - -info: - @echo "This is a simple workflow with Tesseract segmentation+recognition" - @echo "from $(INPUT) to $(OUTPUT) with recognition model MODEL=$(MODEL)" - -.PHONY: info - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile diff --git a/all-tess-frak2021.mk b/all-tess-frak2021.mk deleted file mode 100644 index 7a6f343..0000000 --- a/all-tess-frak2021.mk +++ /dev/null @@ -1,45 +0,0 @@ -# This file can run a workflow on a single workspace (non-recursively). -# -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside and running -# `make` there (or including files from there). -# -# Call via: -# `make -f WORKFLOW-CONFIG.mk` -# -# To rebuild partially, you must pass -W to `make`: -# `make -f WORKFLOW-CONFIG.mk -W FILEGRP` -# -# To build in parallel, use `-j [CPUS] [-l [LOADLEVEL]]` etc. -# -# To get general help: -# `make -f WORKFLOW-CONFIG.mk help` -# -# To get a description of the workflow: -# `make -f WORKFLOW-CONFIG.mk info` - -### -# From here on, custom configuration begins. - -INPUT = OCR-D-IMG - -$(INPUT): - ocrd workspace find -G $@ --download - -OUTPUT = OCR-D-OCR-TESS-ALL-FRAK2021 -$(OUTPUT): $(INPUT) -$(OUTPUT): TOOL = ocrd-tesserocr-recognize -$(OUTPUT): PARAMS = "segmentation_level": "region", "model": "frak2021+GT4HistOCR+frk+deu-frak+deu+Fraktur+Latin", "shrink_polygons": true #, "auto_model": true - -info: - @echo "This is a simple workflow with Tesseract segmentation+recognition" - @echo "from $(INPUT) to $(OUTPUT) with various Fraktur models" - -.PHONY: info - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile diff --git a/gt-binarize-page-olena-sauvola-clip-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-sauvola-clip-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index 2a6a9f0..0000000 --- a/gt-binarize-page-olena-sauvola-clip-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,89 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize pages," - @echo "then clip regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -CLIP = $(BIN)-CLIP - -$(CLIP): $(BIN) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -RESEG = $(CLIP)-RESEG - -$(RESEG): $(CLIP) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-sauvola-denoise-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-sauvola-denoise-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index 9196bc7..0000000 --- a/gt-binarize-page-olena-sauvola-denoise-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,95 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise pages," - @echo "then clip regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -CLIP = $(DEN)-CLIP - -$(CLIP): $(DEN) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -RESEG = $(CLIP)-RESEG - -$(RESEG): $(CLIP) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract-extract-lines.mk b/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract-extract-lines.mk deleted file mode 100644 index 8dd03cb..0000000 --- a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract-extract-lines.mk +++ /dev/null @@ -1,139 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise+deskew pages," - @echo "then clip+deskew regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally extract line images and line texts" - @echo "(both the GT and OCR versions) into one directory," - @echo "with conventional filename suffixes for OCR/post-correction training." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -FLIP = $(DEN)-DESKEW-tesseract - -$(FLIP): $(DEN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -FLIP2 = $(CLIP)-DESKEW-tesseract - -$(FLIP2): $(CLIP) -$(FLIP2): TOOL = ocrd-tesserocr-deskew -$(FLIP2): PARAMS = "operation_level": "region" - -DESK2 = $(FLIP2)-DESKEW-ocropy - -$(DESK2): $(FLIP2) -$(DESK2): TOOL = ocrd-cis-ocropy-deskew -$(DESK2): PARAMS = "level-of-operation": "region" - -RESEG = $(DESK2)-RESEG - -$(RESEG): $(DESK2) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -LINES = $(patsubst %,OCR-D-IMG-LINES-%,$(DEW) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8)) - -$(LINES): OCR-D-IMG-LINES-%: % -$(LINES): TOOL = ocrd-segment-extract-lines -$(LINES): PARAMS = "transparency": true - -OUTPUT = OCR-D-IMG-LINES - -$(OUTPUT): $(LINES) - @mkdir -p $(OUTPUT) - set -e; \ - ln -frs $* $@; \ - for grp in $(filter-out $<,$^); do \ - suffix=$(<:OCR-D-IMG-LINES-$(INPUT)-%=%); \ - ocr=$${grp%-$$suffix}; \ - ocr=$${ocr#OCR-D-IMG-LINES-}; \ - for file in $$grp/*.gt.txt; do \ - newfile=$${file/$$grp\/$$grp/$@\/$<}; \ - newfile=$${newfile/.gt.txt/.$$ocr.txt}; \ - ln -frs $$file $$newfile; \ - done \ - done || { rm -fr $(OUTPUT); exit 1; } - -.DEFAULT_GOAL = $(OUTPUT) - -.PHONY: $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index 5dca162..0000000 --- a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,119 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise+deskew pages," - @echo "then clip+deskew regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -FLIP = $(DEN)-DESKEW-tesseract - -$(FLIP): $(DEN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -FLIP2 = $(CLIP)-DESKEW-tesseract - -$(FLIP2): $(CLIP) -$(FLIP2): TOOL = ocrd-tesserocr-deskew -$(FLIP2): PARAMS = "operation_level": "region" - -DESK2 = $(FLIP2)-DESKEW-ocropy - -$(DESK2): $(FLIP2) -$(DESK2): TOOL = ocrd-cis-ocropy-deskew -$(DESK2): PARAMS = "level-of-operation": "region" - -RESEG = $(DESK2)-RESEG - -$(RESEG): $(DESK2) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index 49dacab..0000000 --- a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,107 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise+deskew pages," - @echo "then clip regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -FLIP = $(DEN)-DESKEW-tesseract - -$(FLIP): $(DEN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -RESEG = $(CLIP)-RESEG - -$(RESEG): $(CLIP) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-shrink-extract-regions.mk b/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-shrink-extract-regions.mk deleted file mode 100644 index 25c6037..0000000 --- a/gt-binarize-page-olena-sauvola-denoise-ocropy-deskew-page-ocropy-clip-shrink-extract-regions.mk +++ /dev/null @@ -1,85 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` -# To get help on available goals: -# `make help` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise+deskew pages," - @echo "then clip regions," - @echo "then shrink regions into the hull polygon of its lines," - @echo "and finally extract page images and region coordinates" - @echo "(including meta-data) into one directory," - @echo "with corresponding filename suffixes for segmentation training." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -FLIP = $(DEN)-DESKEW-tesseract - -$(FLIP): $(DEN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -RESEG = OCR-D-SEG-LINE - -$(RESEG): $(CLIP) -$(RESEG): TOOL = ocrd-cis-ocropy-segment -$(RESEG): PARAMS = "spread": 2.4 - -TIGHT = OCR-D-SEG-BLOCK - -$(TIGHT): $(RESEG) -$(TIGHT): TOOL = ocrd-segment-repair -$(TIGHT): PARAMS = "sanitize": true - -OUTPUT = OCR-D-IMG-REGIONS - -$(OUTPUT): $(TIGHT) -$(OUTPUT): TOOL = ocrd-segment-extract-regions -$(OUTPUT): PARAMS = "transparency": true - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-sauvola-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-sauvola-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index a6fed2e..0000000 --- a/gt-binarize-page-olena-sauvola-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,113 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+deskew pages," - @echo "then clip+deskew regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -FLIP = $(BIN)-DESKEW-tesseract - -$(FLIP): $(BIN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -FLIP2 = $(CLIP)-DESKEW-tesseract - -$(FLIP2): $(CLIP) -$(FLIP2): TOOL = ocrd-tesserocr-deskew -$(FLIP2): PARAMS = "operation_level": "region", "min_orientation_confidence": 1.5 - -DESK2 = $(FLIP2)-DESKEW-ocropy - -$(DESK2): $(FLIP2) -$(DESK2): TOOL = ocrd-cis-ocropy-deskew -$(DESK2): PARAMS = "level-of-operation": "page", "maxskew": 5 - -RESEG = $(DESK2)-RESEG - -$(RESEG): $(DESK2) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-sauvola-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-sauvola-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index aad8ff6..0000000 --- a/gt-binarize-page-olena-sauvola-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,101 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+deskew pages," - @echo "then clip regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-sauvola - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "sauvola-ms-split" - -FLIP = $(BIN)-DESKEW-tesseract - -$(FLIP): $(BIN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -RESEG = $(CLIP)-RESEG - -$(RESEG): $(CLIP) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-wolf-denoise-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-wolf-denoise-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index eea6c21..0000000 --- a/gt-binarize-page-olena-wolf-denoise-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,95 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise pages," - @echo "then clip regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-wolf - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "wolf" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -CLIP = $(DEN)-CLIP - -$(CLIP): $(DEN) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -RESEG = $(CLIP)-RESEG - -$(RESEG): $(CLIP) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-wolf-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-wolf-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index efb8562..0000000 --- a/gt-binarize-page-olena-wolf-denoise-ocropy-deskew-page-ocropy-clip-deskew-region-tesseract-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,119 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise+deskew pages," - @echo "then clip+deskew regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-wolf - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "wolf" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -FLIP = $(DEN)-DESKEW-tesseract - -$(FLIP): $(DEN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -FLIP2 = $(CLIP)-DESKEW-tesseract - -$(FLIP2): $(CLIP) -$(FLIP2): TOOL = ocrd-tesserocr-deskew -$(FLIP2): PARAMS = "operation_level": "region", "min_orientation_confidence": 1.5 - -DESK2 = $(FLIP2)-DESKEW-ocropy - -$(DESK2): $(FLIP2) -$(DESK2): TOOL = ocrd-cis-ocropy-deskew -$(DESK2): PARAMS = "level-of-operation": "region" - -RESEG = $(DESK2)-RESEG - -$(RESEG): $(DESK2) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt-binarize-page-olena-wolf-denoise-ocropy-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk b/gt-binarize-page-olena-wolf-denoise-ocropy-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk deleted file mode 100644 index c4a0cf5..0000000 --- a/gt-binarize-page-olena-wolf-denoise-ocropy-deskew-page-ocropy-clip-resegment-dewarp-ocr-ocropy-tesseract.mk +++ /dev/null @@ -1,107 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` - -### -# From here on, custom configuration begins. - -info: - @echo "Read GT line segmentation," - @echo "then binarize+denoise+deskew pages," - @echo "then clip regions," - @echo "then resegment+dewarp lines," - @echo "then recognize lines with various Ocropus+Tesseract models," - @echo "and finally evaluate OCR quality by measuring" - @echo "character error rates on line texts w.r.t. GT." - -INPUT = OCR-D-GT-SEG-LINE - -$(INPUT): - ocrd workspace find -G $@ --download - ocrd workspace find -G OCR-D-IMG --download # just in case - -BIN = $(INPUT)-BINPAGE-wolf - -$(BIN): $(INPUT) -$(BIN): TOOL = ocrd-olena-binarize -$(BIN): PARAMS = "impl": "wolf" - -DEN = $(BIN)-DENOISE-ocropy - -$(DEN): $(BIN) -$(DEN): TOOL = ocrd-cis-ocropy-denoise -$(DEN): PARAMS = "level-of-operation": "page", "noise_maxsize": 3.0 - -FLIP = $(DEN)-DESKEW-tesseract - -$(FLIP): $(DEN) -$(FLIP): TOOL = ocrd-tesserocr-deskew -$(FLIP): PARAMS = "operation_level": "page" - -DESK = $(FLIP)-DESKEW-ocropy - -$(DESK): $(FLIP) -$(DESK): TOOL = ocrd-cis-ocropy-deskew -$(DESK): PARAMS = "level-of-operation": "page", "maxskew": 5 - -CLIP = $(DESK)-CLIP - -$(CLIP): $(DESK) -$(CLIP): TOOL = ocrd-cis-ocropy-clip - -RESEG = $(CLIP)-RESEG - -$(RESEG): $(CLIP) -$(RESEG): TOOL = ocrd-cis-ocropy-resegment - -DEW = $(RESEG)-DEWARP - -$(DEW): $(RESEG) -$(DEW): TOOL = ocrd-cis-ocropy-dewarp - -OCR1 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-fraktur-%) -OCR2 = $(DEW:$(INPUT)-%=OCR-D-OCR-OCRO-frakturjze-%) -OCR3 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-%) -OCR4 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-Fraktur-Latin-%) -OCR5 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-%) -OCR6 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-frk-deu-%) -OCR7 = $(DEW:$(INPUT)-%=OCR-D-OCR-TESS-gt4histocr-%) -OCR8 = $(DEW:$(INPUT)-%=OCR-D-OCR-CALA-gt4histocr-%) - -$(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8): $(DEW) - -$(OCR1) $(OCR2): TOOL = ocrd-cis-ocropy-recognize -$(OCR1): PARAMS = "textequiv_level": "glyph", "model": "fraktur.pyrnn" -$(OCR2): PARAMS = "textequiv_level": "glyph", "model": "fraktur-jze.pyrnn" - -$(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7): TOOL = ocrd-tesserocr-recognize -$(OCR3): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur" -$(OCR4): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "script/Fraktur+script/Latin" -$(OCR5): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk" -$(OCR6): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "frk+deu" -$(OCR7): PARAMS = "textequiv_level" : "glyph", "overwrite_words": true, "model" : "GT4HistOCR_2000000+GT4HistOCR_300000+GT4HistOCR_100000" - -$(OCR8): TOOL = ocrd-calamari-recognize -$(OCR8): GPU = 1 -$(OCR8): PARAMS = "checkpoint" : "$(VIRTUAL_ENV)/share/calamari/GT4HistOCR/*.ckpt.json" - -OUTPUT = $(DEW)-OCR - -$(OUTPUT): $(INPUT) $(OCR1) $(OCR2) $(OCR3) $(OCR4) $(OCR5) $(OCR6) $(OCR7) $(OCR8) -$(OUTPUT): TOOL = ocrd-cor-asv-ann-evaluate -$(OUTPUT): PARAMS = "metric" : "historic_latin" - -.DEFAULT_GOAL = $(OUTPUT) - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/gt.mk b/gt.mk deleted file mode 100644 index a85f544..0000000 --- a/gt.mk +++ /dev/null @@ -1,30 +0,0 @@ -# Install by copying (or symlinking) makefiles into a directory -# where all OCR-D workspaces (unpacked BagIts) reside. Then -# chdir to that location. - -# Call via: -# `make -f WORKFLOW-CONFIG.mk WORKSPACE-DIRS` or -# `make -f WORKFLOW-CONFIG.mk all` or just -# `make -f WORKFLOW-CONFIG.mk` -# To rebuild partially, you must pass -W to recursive make: -# `make -f WORKFLOW-CONFIG.mk EXTRA_MAKEFLAGS="-W FILEGRP"` -# To get help on available goals: -# `make help` - -### -# From here on, custom configuration begins. - -GT_FILEGRPS = $(shell test -f mets.xml && ocrd workspace list-group | fgrep -x -e OCR-D-IMG -e OCR-D-GT-SEG-PAGE -e OCR-D-GT-SEG-BLOCK -e OCR-D-GT-SEG-LINE) - -all: $(GT_FILEGRPS) - -$(GT_FILEGRPS): - ocrd workspace find -G $@ --download - -.PHONY: all - -# Down here, custom configuration ends. -### - -include Makefile - diff --git a/ocrd-import b/ocrd-import deleted file mode 100755 index c765e2d..0000000 --- a/ocrd-import +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env bash - -function cleanup { - set +e - rm -f mets.sock 2>/dev/null -} -trap cleanup EXIT - -# FIXME: bash says under BUGS "There may be only one active coprocess at a time." which causes spurious warnings here. -# (We therefore temporarily silence stderr to silence the execute_coproc warnings.) -exec 4>&2 -{ -coproc critical { ocrd log -n ocrd-import critical - >& 4 2>& 4; } -coproc error { ocrd log -n ocrd-import error - >& 4 2>& 4; } -coproc warning { ocrd log -n ocrd-import warning - >& 4 2>& 4; } -coproc info { ocrd log -n ocrd-import info - >& 4 2>& 4; } -coproc debug { ocrd log -n ocrd-import debug - >& 4 2>& 4; } -} 2>/dev/null - -function critical { echo "$1" >& ${critical[1]}; } -function error { echo "$1" >& ${error[1]}; } -function warning { echo "$1" >& ${warning[1]}; } -function info { echo "$1" >& ${info[1]}; } -function debug { echo "$1" >& ${debug[1]}; } - -((BASH_VERSINFO<4 || BASH_VERSINFO==4 && BASH_VERSINFO[1]<4)) && critical "bash $BASH_VERSION is too old. Please install 4.4 or newer" && exit 2 - -ignore=0 -skip=() -regex=() -convert=1 -dpi=300 -numpageid=1 -onlybasename=0 -while (($#)); do - case "${1:--h}" in - -h|-[-]help) - cat <