Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
- run: apt-get update && apt-get install -y --no-install-recommends make wget unzip
- checkout
- run: make deps-ubuntu
- run: make install VIRTUAL_ENV=/usr/local
- run: make install
- run: ocrd resmgr download -n
https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/german_print/tessdata_best/german_print_0.877_1254744_7309067.traineddata
ocrd-tesserocr-recognize german_print.traineddata
Expand Down
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
test/
dist/
build/
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
*~
test/data*

.pytest_cache
__pycache__
*.py[cod]
*.egg-info
/build
/dist
25 changes: 16 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,29 @@ FROM $DOCKER_BASE_IMAGE
ARG VCS_REF
ARG BUILD_DATE
LABEL \
maintainer="https://github.com/bertsky/workflow-configuration/issues" \
maintainer="https://ocr-d.de/en/contact" \
org.label-schema.vcs-ref=$VCS_REF \
org.label-schema.vcs-url="https://github.com/bertsky/workflow-configuration" \
org.label-schema.build-date=$BUILD_DATE
org.label-schema.build-date=$BUILD_DATE \
org.opencontainers.image.vendor="DFG-Funded Initiative for Optical Character Recognition Development" \
org.opencontainers.image.title="workflow-configuration" \
org.opencontainers.image.description="" \
org.opencontainers.image.source="https://github.com/bertsky/workflow-configuration" \
org.opencontainers.image.documentation="https://github.com/bertsky/workflow-configuration/blob/${VCS_REF}/README.md" \
org.opencontainers.image.revision=$VCS_REF \
org.opencontainers.image.created=$BUILD_DATE \
org.opencontainers.image.base.name=ocrd/core

SHELL ["/bin/bash", "-c"]
WORKDIR /build/workflow-configuration
WORKDIR /build/module

COPY . .
COPY ocrd-tool.json .
COPY ocrd-make ocrd-import ocrd-page-transform xsl-transform .
COPY Makefile *.mk .
COPY *.xsl .
COPY README.md .
# prepackage ocrd-tool.json as ocrd-all-tool.json
RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json
# install everything and reduce image size
RUN make deps-ubuntu
RUN make install VIRTUAL_ENV=/usr/local
RUN rm -fr /build/workflow-configuration
RUN make install && rm -fr /build/module

WORKDIR /data
VOLUME ["/data"]
125 changes: 47 additions & 78 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,35 +1,21 @@
# OCR-D workflow configuration installation makefile
#
# Install workflow configurations persistently by running:
# `make install`
# (in the git repo), which will copy workflow.mk (as `Makefile`), all
# preconfigured makefiles, and some shell scripts into
# a fixed target directory under the VIRTUAL_ENV prefix).
#
# For installation via shell-script:
VIRTUAL_ENV ?= $(CURDIR)/local
# copy `ocrd-make` here:
BINDIR = $(abspath $(VIRTUAL_ENV))/bin
# copy the makefiles here:
SHAREDIR = $(abspath $(VIRTUAL_ENV))/share/workflow-configuration

# we need associative arrays, process substitution etc.
# also, fail on failed intermediates as well:
SHELL = bash -o pipefail

CONFIGURATION := $(abspath $(firstword $(MAKEFILE_LIST)))
# `make install` or `pip install .` (in the git repo),
# which will copy all distribution files (`Makefile` and all
# preconfigured makefiles `*.mk`, as well as some Python
# scripts and XSL transforms `*.xsl`) into the Python
# site directory.
# Using venv is recommended.

CONFIGDIR := $(dir $(CONFIGURATION))
PYTHON = python3
PIP = pip3
PYTHONIOENCODING=utf8

EXISTING_MAKEFILES = $(patsubst $(CONFIGDIR)/%,%,$(wildcard $(CONFIGDIR)/*.mk))
EXISTING_TRANSFORMS = $(patsubst $(CONFIGDIR)/%,%,$(wildcard $(CONFIGDIR)/*.xsl))
SHELL = bash -o pipefail

ifeq ($(filter workflow.mk,$(EXISTING_MAKEFILES)),)
$(error "Found no .mk makefiles in source directory $(CONFIGDIR)")
endif
ifeq ($(EXISTING_TRANSFORMS),)
$(error "Found no .xsl transforms in source directory $(CONFIGDIR)")
endif
DOCKER_BASE_IMAGE = docker.io/ocrd/core:v3.1.0
DOCKER_TAG = bertsky/workflow-configuration

help:
@echo "Installing OCR-D workflow configurations:"
Expand All @@ -39,51 +25,46 @@ help:
@echo
@echo " Targets:"
@echo " * help (this message)"
@echo " * test (run test suite)"
@echo " * deps-ubuntu (install extra system packages needed here, beyond ocrd and processors)"
@echo " * install (copy $(SHPROGS) and configuration makefiles to"
@echo " * VIRTUAL_ENV=$(VIRTUAL_ENV)"
@echo " * from repository workdir)"
@echo " * uninstall (remove $(SHPROGS) and configuration makefiles from"
@echo " * VIRTUAL_ENV=$(VIRTUAL_ENV))"
@echo " * deps-ubuntu (install system packages needed here, beyond ocrd and processors)"
@echo " * deps (install Python packages needed here)"
@echo " * install (install this package via $(PIP)"
@echo " build (build source and binary distribution)"
@echo " * uninstall (remove this package via $(PIP)"
@echo " * %.mk (any filename with suffix .mk not existing yet: spawn new makefile from pattern)"
@echo " * test (run test suite)"
@echo
@echo " Variables:"
@echo
@echo " * VIRTUAL_ENV: directory prefix to use for installation"
@echo " * PYTHON (name of Python version binary [$(PYTHON)])"
@echo " * PIP (name of Python pip version binary [$(PIP)])"

.PHONY: help

deps-ubuntu:
apt-get -y install parallel xmlstarlet bc sed libdbd-sqlite3-perl

XSLPROGS =$(EXISTING_TRANSFORMS:%.xsl=%)
SHPROGS = ocrd-make ocrd-import ocrd-page-transform
PROGS = $(SHPROGS) $(XSLPROGS)
install-bin: $(PROGS:%=$(BINDIR)/%) | $(BINDIR)

$(SHPROGS:%=$(BINDIR)/%): $(BINDIR)/%: %
sed 's,^SHAREDIR=.*,SHAREDIR="$(SHAREDIR)",' < $< > $@
chmod +x $@
deps: requirements.txt
$(PIP) install -r $<

$(XSLPROGS:%=$(BINDIR)/%): %: xsl-transform
sed 's,^SHAREDIR=.*,SHAREDIR="$(SHAREDIR)",' < $< > $@
chmod +x $@
install:
$(PIP) install .

$(BINDIR) $(SHAREDIR):
@mkdir -p $@
install-dev:
$(PIP) install -e .

install: install-bin | $(SHAREDIR)
cp -Lf $(EXISTING_MAKEFILES) $(EXISTING_TRANSFORMS) ocrd-tool.json $(SHAREDIR)
mv $(SHAREDIR)/workflow.mk $(SHAREDIR)/Makefile
build:
$(PIP) install build
$(PYTHON) -m build .

uninstall:
$(RM) $(PROGS:%=$(BINDIR)/%)
$(RM) -r $(SHAREDIR)
$(PIP) uninstall workflow_configuration

TEST_WORKFLOW = -f all-tess-MODEL.mk MODEL=german_print \
-f transform.mk TROPTIONS="-P xsl page-extract-text.xsl \
-P xslt-params '-s level=line' -P mimetype text/plain" \
-f cat-files.mk
define testrecipe =
function testfun { pushd `mktemp -d` && cp -pr $(abspath $^) . && /usr/bin/time ocrd-make -f all-tess-MODEL.mk MODEL=german_print LOGLEVEL=ERROR $(^F) "$$@" && $(RM) -r $$DIRSTACK; }; testfun
function testfun { pushd `mktemp -d` && cp -pr $(abspath $^) . && /usr/bin/time ocrd-make $(TEST_WORKFLOW) LOGLEVEL=ERROR $(^F) "$$@" && cat $(^F:%=%.*.log) && $(RM) -r $$DIRSTACK; }; testfun
endef
test: test/data1 test/data2
$(testrecipe)
Expand All @@ -106,41 +87,37 @@ test/data2:
ocrd workspace -d $@ rename-group ORIGINAL OCR-D-IMG
ocrd workspace -d $@ prune-files

DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda-torch:v2.69.0
DOCKER_TAG ?= bertsky/workflow-configuration
docker:
docker build \
-t $(DOCKER_TAG) \
--build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \
--build-arg VCS_REF=$$(git rev-parse --short HEAD) \
--build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
-t $(DOCKER_TAG) .
--build-arg VCS_REF=$(git rev-parse --short HEAD) \
--build-arg BUILD_DATE=$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
.

.PHONY: deps-ubuntu install install-bin uninstall test docker
.PHONY: deps-ubuntu deps install install-dev build uninstall test docker

# spawn a new configuration
define skeleton =
# This file can run a workflow on a single workspace (non-recursively).
#
# Install by copying (or symlinking) makefiles into a directory
# where all OCR-D workspaces (unpacked BagIts) reside and running
# `make` there (or including files from there).
# `ocrd-make` there (or including files from there).
#
# Call via:
# `make -f WORKFLOW-CONFIG.mk`
# `ocrd-make -f WORKFLOW-CONFIG.mk`
#
# To rebuild partially, you must pass -W to `make`:
# `make -f WORKFLOW-CONFIG.mk -W FILEGRP`
# `ocrd-make -f WORKFLOW-CONFIG.mk -W FILEGRP`
#
# To build in parallel, use `-j [CPUS] [-l [LOADLEVEL]]` etc.
#
# To get general help:
# `make -f WORKFLOW-CONFIG.mk help`
# `ocrd-make --help`
#
# To get a description of the workflow:
# `make -f WORKFLOW-CONFIG.mk info`

###
# From here on, custom configuration begins.
# `ocrd-make -f WORKFLOW-CONFIG.mk info`

INPUT = OCR-D-IMG

Expand All @@ -150,7 +127,7 @@ $$(INPUT):
OUTPUT = OCR-D-OUT
$$(OUTPUT): $$(INPUT)
$$(OUTPUT): TOOL = ocrd-dummy
$$(OUTPUT): PARAMS =
$$(OUTPUT): OPTIONS =

info:
@echo "This is a dummy configuration that creates a copy $$(OUTPUT) of the input fileGrp $$(INPUT)"
Expand All @@ -159,24 +136,16 @@ info:

.DEFAULT_GOAL = $$(OUTPUT)

# Down here, custom configuration ends.
###

SELFDIR := $$(dir $$(abspath $$(firstword $$(MAKEFILE_LIST))))
include $$(SELFDIR)/Makefile
endef

export skeleton

%.mk:
@echo >$@ "$$skeleton"


# do not search for implicit rules here:
%/Makefile: ;
Makefile: ;
local.mk: ;
ocrd-tool.json: ;
$(CONFIGURATION): ;
$(EXISTING_MAKEFILES): ;
$(EXISTING_TRANSFORMS): ;
$(PROGS): ;
local.mk: ;
Loading