Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Pytest CI

on: [push]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
make install-dev
make deps-test
- name: Test with pytest
run: make coverage PYTEST_ARGS="-vv --workspace=all --junitxml=pytest.xml"
- name: Get coverage results
run: |
coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
coverage xml
- name: Store coverage results
uses: actions/upload-artifact@v4
with:
name: coverage-report_${{ matrix.python-version }}
path: pytest.xml
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
tests/assets/
/*.zip

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "repo/assets"]
path = repo/assets
url = https://github.com/OCR-D/assets
41 changes: 31 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,41 @@ FROM $DOCKER_BASE_IMAGE
ARG VCS_REF
ARG BUILD_DATE
LABEL \
maintainer="https://ocr-d.de/kontakt" \
maintainer="https://ocr-d.de/en/contact" \
org.label-schema.vcs-ref=$VCS_REF \
org.label-schema.vcs-url="https://github.com/bertsky/docstruct" \
org.label-schema.build-date=$BUILD_DATE
org.label-schema.build-date=$BUILD_DATE \
org.opencontainers.image.vendor="DFG-Funded Initiative for Optical Character Recognition Development" \
org.opencontainers.image.title="nmalign" \
org.opencontainers.image.description="Document structure detection from PAGE to METS" \
org.opencontainers.image.source="https://github.com/bertsky/docstruct" \
org.opencontainers.image.documentation="https://github.com/bertsky/docstruct/blob/${VCS_REF}/README.md" \
org.opencontainers.image.revision=$VCS_REF \
org.opencontainers.image.created=$BUILD_DATE \
org.opencontainers.image.base.name=ocrd/core

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONIOENCODING=utf8
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8

# avoid HOME/.local/share (hard to predict USER here)
# so let XDG_DATA_HOME coincide with fixed system location
# (can still be overridden by derived stages)
ENV XDG_DATA_HOME /usr/local/share
# avoid the need for an extra volume for persistent resource user db
# (i.e. XDG_CONFIG_HOME/ocrd/resources.yml)
ENV XDG_CONFIG_HOME /usr/local/share/ocrd-resources

WORKDIR /build/docstruct
COPY setup.py .
COPY docstruct/ocrd-tool.json .
COPY docstruct ./docstruct
COPY requirements.txt .
COPY README.md .
COPY Makefile .
RUN make install
RUN rm -rf /build/docstruct
COPY . .
COPY ocrd-tool.json .
# prepackage ocrd-tool.json as ocrd-all-tool.json
RUN ocrd ocrd-tool ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json
# install everything and reduce image size
RUN make install && rm -rf /build/docstruct
# smoke test
RUN ocrd-docstruct -h

WORKDIR /data
VOLUME ["/data"]
64 changes: 59 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,31 +1,85 @@
PYTHON = python3
PIP = pip3
PYTHONIOENCODING=utf8
PYTEST_ARGS ?= "-vv --workspace=all"

DOCKER_BASE_IMAGE = docker.io/ocrd/core:v2.69.0
DOCKER_BASE_IMAGE = docker.io/ocrd/core:v3.3.0
DOCKER_TAG = ocrd/docstruct

help:
@echo
@echo " Targets"
@echo
@echo " deps Install only Python deps via pip"
@echo " install Install full Python package via pip"
@echo " docker Build a Docker image $(DOCKER_TAG) from $(DOCKER_BASE_IMAGE)"
@echo " deps Install only Python deps via pip"
@echo " install Install full Python package via pip"
@echo " install-dev Install in editable mode"
@echo " build Build binary and source Python package"
@echo " docker Build a Docker image $(DOCKER_TAG) from $(DOCKER_BASE_IMAGE)"
@echo " test Run tests via Pytest"
@echo " repo/assets Clone OCR-D/assets to ./repo/assets"
@echo " tests/assets Copy to ./tests/assets"
@echo ""
@echo " Variables"
@echo ""
@echo " DOCKER_TAG Docker container tag ($(DOCKER_TAG))"
@echo " PYTEST_ARGS Additional runtime options for pytest ($(PYTEST_ARGS))"
@echo " (See --help, esp. custom option --workspace)"

# Install Python deps via pip
deps:
$(PIP) install -r requirements.txt

deps-test:
$(PIP) install -r requirements-test.txt

# Install Python package via pip
install:
$(PIP) install .

install-dev:
$(PIP) install -e .

build:
$(PIP) install build wheel
$(PYTHON) -m build .

# Run test
test: tests/assets
$(PYTHON) -m pytest tests --durations=0 $(PYTEST_ARGS)

coverage:
coverage erase
$(MAKE) test PYTHON="coverage run"
coverage report -m

#
# Assets
#

# Update OCR-D/assets submodule
.PHONY: repos always-update tests/assets
repo/assets: always-update
git submodule sync --recursive $@
if git submodule status --recursive $@ | grep -qv '^ '; then \
git submodule update --init --recursive $@ && \
touch $@; \
fi

benner_herrnhuterey04_1748.ocrd.zip:
wget https://github.com/OCR-D/gt_structure_text/releases/download/v1.5.0/$@

# Setup test assets
tests/assets: benner_herrnhuterey04_1748.ocrd.zip
tests/assets: repo/assets
mkdir -p $@
cp -a $</data/* $@
$(foreach BAG,$(filter %.zip,$^),ocrd zip spill -d $@/$(basename $(BAG)) $(BAG))

docker:
docker build \
--build-arg DOCKER_BASE_IMAGE=$(DOCKER_BASE_IMAGE) \
--build-arg VCS_REF=$$(git rev-parse --short HEAD) \
--build-arg BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ") \
-t $(DOCKER_TAG) .

.PHONY: help deps install docker
.PHONY: help deps deps-test install install-dev build test coverage docker
4 changes: 0 additions & 4 deletions docstruct/config.py

This file was deleted.

4 changes: 3 additions & 1 deletion docstruct/ocrd-tool.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
{
"version": "0.0.1",
"version": "0.0.2",
"git_url": "https://github.com/bertsky/docstruct",
"tools": {
"ocrd-docstruct": {
"executable": "ocrd-docstruct",
"categories": ["Layout analysis"],
"description": "Parsing page-level text regions with headings and reading order, create a dummy logical structMap",
"steps": ["layout/analysis"],
"input_file_grp_cardinality": 1,
"output_file_grp_cardinality": 0,
"parameters": {
"mode": {
"type": "string",
Expand Down
Loading