Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added core/__init__.py
Empty file.
153 changes: 115 additions & 38 deletions core/document.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
import io
import os
import shutil
import tempfile
import traceback
from pathlib import Path
from typing import Optional
from zipfile import PyZipFile
from zipfile import BadZipFile, PyZipFile

import cssselect2
from defusedxml import ElementTree

from .constants import UNITS
from .resources import res_add_font, res_add_multimedia
from .resources import (
Images,
MultiMedias,
res_add_drawparams,
res_add_font,
res_add_multimedia,
)
from .surface import *
from pathlib import Path
import tempfile
import shutil


class OFDFile(object):
Expand All @@ -28,6 +34,8 @@ class OFDFile(object):

def __init__(self, fobj):
self.zf = fobj if isinstance(fobj, PyZipFile) else PyZipFile(fobj)
if getattr(fobj, "filename", None):
self.zf.filename = getattr(fobj, "filename")
# for info in self._zf.infolist():
# print(info)
self.node_tree = self.read_node("OFD.xml")
Expand All @@ -51,31 +59,27 @@ def draw_document(self, doc_num=0, destination: Optional[str] = None):
paths = []
for i, page in enumerate(document.pages):
surface = Surface(page, os.path.split(self.zf.filename)[-1].strip(".ofd"))
paths.append(
surface.draw(page, destination / Path(f"{surface.filename}_{i}.png"))
)
shutil.rmtree(self.document.work_folder, ignore_errors=True)
paths.append(surface.draw(page, destination / Path(f"{surface.filename}_{i}.png")))
return paths


class OFDDocument(object):
def __init__(self, _zf, node, n=0):
self.pages = []
self._zf = _zf
self.work_folder = tempfile.mkdtemp()
self.name = f"Doc_{n}"
self.node = node
self.physical_box = [
float(i)
for i in node["CommonData"]["PageArea"]["PhysicalBox"].text.split(" ")
]
try:
self.physical_box = [
float(i) for i in node["CommonData"]["PageArea"]["PhysicalBox"].text.split(" ")
]
except:
self.physical_box = [0.0, 0.0, 210.0, 140.0]
self._parse_res()
# print('Resources:', Fonts, Images)
# assert len(node['CommonData']['TemplatePage']) == len(node['Pages']['Page'])
if isinstance(node["Pages"]["Page"], list):
sorted_pages = sorted(
node["Pages"]["Page"], key=lambda x: int(x.attr["ID"])
)
sorted_pages = sorted(node["Pages"]["Page"], key=lambda x: int(x.attr["ID"]))
else:
sorted_pages = [node["Pages"]["Page"]]
sorted_tpls = []
Expand All @@ -89,33 +93,71 @@ def __init__(self, _zf, node, n=0):

seal_node = None
if f"{self.name}/Signs/Sign_0/SignedValue.dat" in _zf.namelist():
seal_file = OFDFile(
io.BytesIO(_zf.read(f"{self.name}/Signs/Sign_0/SignedValue.dat"))
)
seal_node = seal_file.document.pages[0].page_node
try:
seal_file = OFDFile(
io.BytesIO(_zf.read(f"{self.name}/Signs/Sign_0/SignedValue.dat"))
)
seal_node = seal_file.document.pages[0].page_node
except BadZipFile as _:
print(f"BadZipFile: {self.name}/Signs/Sign_0/SignedValue.dat")

for i, p in enumerate(sorted_pages):
document = _zf.read(self.name + "/" + sorted_pages[i].attr["BaseLoc"])
tree = ElementTree.fromstring(document)
root = cssselect2.ElementWrapper.from_xml_root(tree)
page_node = Node(root)
annots = None
if "Annotations" in self.node:
annots = self.get_node_tree(self.name + "/" + self.node["Annotations"].text)

for i, p in enumerate(sorted_pages):
page_id = p.attr["ID"]
page_node = self.get_node_tree(self.name + "/" + sorted_pages[i].attr["BaseLoc"])
annot_node = None
if annots:
if isinstance(annots["Page"], list):
annot_page = next(
iter([page for page in annots["Page"] if page.attr["PageID"] == page_id]),
None,
)
if annot_page:
annot_node = self.get_node_tree(
self.name + "/Annots/" + annot_page["FileLoc"].text
)
elif isinstance(annots["Page"], Node) and annots["Page"].attr["PageID"] == page_id:
annot_node = self.get_node_tree(
self.name + "/Annots/" + annots["Page"]["FileLoc"].text
)
tpl_node = None
if i < len(sorted_tpls):
document = _zf.read(self.name + "/" + sorted_tpls[i].attr["BaseLoc"])
tree = ElementTree.fromstring(document)
root = cssselect2.ElementWrapper.from_xml_root(tree)
tpl_node = Node(root)
try:
# get tpl_node from ID
tpl = [
tpl
for tpl in sorted_tpls
if page_node["Template"].attr["TemplateID"] == tpl.attr["ID"]
][0]
tpl_node = self.get_node_tree(self.name + "/" + tpl.attr["BaseLoc"])
except:
pass
# fallback using sorted one.
if tpl_node is None and i < len(sorted_tpls):
tpl_node = self.get_node_tree(self.name + "/" + sorted_tpls[i].attr["BaseLoc"])

self.pages.append(
OFDPage(
self,
f"Page_{i}",
page_id,
page_node,
tpl_node,
seal_node if i == 0 else None,
annot_node=annot_node,
)
)

def get_node_tree(self, location):
if location not in self._zf.namelist():
return None
document = self._zf.read(location)
tree = ElementTree.fromstring(document)
root = cssselect2.ElementWrapper.from_xml_root(tree)
return Node(root)

def _parse_res(self):
if "DocumentRes" in self.node["CommonData"]:
node = Node.from_zp_location(
Expand All @@ -132,7 +174,7 @@ def _parse_res(self):
def _parse_res_node(self, node):
if node.tag in RESOURCE_TAGS:
try:
RESOURCE_TAGS[node.tag](node, self._zf, self.work_folder)
RESOURCE_TAGS[node.tag](node, self._zf)
except Exception as e:
# Error in point parsing, do nothing
print_node_recursive(node)
Expand All @@ -145,17 +187,27 @@ def _parse_res_node(self, node):


class OFDPage(object):
def __init__(self, parent: OFDDocument, name, page_node, tpl_node, seal_node):

def __init__(
self,
parent: OFDDocument,
name,
page_id,
page_node,
tpl_node,
seal_node=None,
annot_node=None,
):
self.parent = parent
self.page_id = page_id
self.name = f"{parent.name}_{name}"
self.physical_box = self.parent.physical_box
if "Area" in page_node and "PhysicalBox" in page_node["Area"]:
self.physical_box = [
float(i) for i in page_node["Area"]["PhysicalBox"].text.split(" ")
]
self.physical_box = [float(i) for i in page_node["Area"]["PhysicalBox"].text.split(" ")]
self.tpl_node = tpl_node
self.page_node = page_node
self.seal_node = seal_node
self.annot_node = annot_node


class Surface(object):
Expand All @@ -179,6 +231,26 @@ def cairo_draw(self, cr, node):
print(traceback.format_exc())
pass
return # no need to go deeper
if node.tag == "Appearance":
boundary = (
[float(i) for i in node.attr["Boundary"].split(" ")]
if "Boundary" in node.attr
else [0, 0, 0, 0]
)
cr.save()
cr.translate(boundary[0], boundary[1])
for child in node.children:
# Only draw known tags
self.cairo_draw(cr, child)
cr.restore()
return
elif node.tag == "Layer":
try:
cairo_layer(node)
except Exception as e:
# Error in point parsing, do nothing
print_node_recursive(node)
print(traceback.format_exc())

for child in node.children:
# Only draw known tags
Expand All @@ -190,7 +262,7 @@ def draw(self, page, path: Optional[str] = None) -> str:
physical_height = self.page.physical_box[3]
width = int(physical_width * self.pixels_per_mm)
height = int(physical_height * self.pixels_per_mm)
# print(f"create cairo surface, width: {width}, height: {height}")
# print(f'create cairo surface, width: {width}, height: {height}')
cairo_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)

self.cr = cairo.Context(cairo_surface)
Expand All @@ -200,9 +272,13 @@ def draw(self, page, path: Optional[str] = None) -> str:
self.cr.paint()
self.cr.move_to(0, 0)

self.cairo_draw(self.cr, self.page.tpl_node)
if self.page.tpl_node:
self.cairo_draw(self.cr, self.page.tpl_node)
self.cairo_draw(self.cr, self.page.page_node)

if self.page.annot_node:
self.cairo_draw(self.cr, self.page.annot_node)

# self.cr.scale(self.pixels_per_mm, self.pixels_per_mm)
# draw StampAnnot
if self.page.seal_node:
Expand All @@ -225,6 +301,7 @@ def draw(self, page, path: Optional[str] = None) -> str:
RESOURCE_TAGS = {
"Font": res_add_font,
"MultiMedia": res_add_multimedia,
"DrawParams": res_add_drawparams,
}


Expand Down
70 changes: 61 additions & 9 deletions core/resources.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import os
import platform

import gi

gi.require_version("Gtk", "3.0")
gi.require_version("PangoCairo", "1.0")
from gi.repository import PangoCairo
import cairo
from subprocess import Popen, PIPE
from subprocess import PIPE, Popen

import cairo
from gi.repository import PangoCairo

Fonts = {}
MultiMedias = {}
Images = {}
DrawParams = {}
font_map = PangoCairo.font_map_get_default()
Cairo_Font_Family_Names = [f.get_name() for f in font_map.list_families()]
# print(Cairo_Font_Family_Names)
Expand Down Expand Up @@ -76,23 +79,28 @@ def parse_from_node(node):


class Image(MultiMedia):
def __init__(self, node, _zf, work_folder: str):
def __init__(self, node, _zf):
super().__init__(node)
self.png_location = None
self.Format = node.attr["Format"] if "Format" in node.attr else ""
suffix = self.location.split(".")[-1]
if suffix == "jb2":
jb2_path = [loc for loc in _zf.namelist() if self.location in loc][0]

x_path = _zf.extract(jb2_path, path=work_folder)
tmp_folder = os.path.basename(_zf.filename).replace(".ofd", "")
x_path = _zf.extract(jb2_path, tmp_folder)
png_path = x_path.replace(".jb2", ".png")

if platform.system() == "Windows":
Popen(["./bin/jbig2dec", "-o", png_path, x_path], stdout=PIPE)
else:
Popen(["jbig2dec", "-o", png_path, x_path], stdout=PIPE)

self.png_location = png_path
elif suffix == "png":
png_path = [loc for loc in _zf.namelist() if self.location in loc][0]
tmp_folder = os.path.basename(_zf.filename).replace(".ofd", "")
x_path = _zf.extract(png_path, tmp_folder)
self.png_location = x_path

def get_cairo_surface(self):
if self.png_location:
Expand All @@ -103,11 +111,55 @@ def __repr__(self):
return f"Image ID:{self.ID}, Format:{self.Format}"


def res_add_font(node, _zf, work_folder):
class DrawParam(object):
def __init__(self, node=None):
self.ID = node.attr.get("ID", None) if node else None
self.line_width = node.attr.get("LineWidth", 0.25) if node else 0.25

self.stroke_color = (
next(
iter(
[
[float(i) / 256.0 for i in child.attr["Value"].split(" ")]
for child in node.children
if child.tag == "StrokeColor" and "Value" in child.attr
]
),
[0, 0, 0],
)
if node
else [0, 0, 0]
)
self.fill_color = (
next(
iter(
[
[float(i) / 256.0 for i in child.attr["Value"].split(" ")]
for child in node.children
if child.tag == "FillColor" and "Value" in child.attr
]
),
[0, 0, 0],
)
if node
else [0, 0, 0]
)
# print(self)

def __repr__(self):
return f"ID[{self.ID}], line_width: {self.line_width}, stroke{self.stroke_color}, fill{self.fill_color}"


def res_add_font(node, _zf):
Fonts[node.attr["ID"]] = Font(node.attr)


def res_add_multimedia(node, _zf, work_folder):
def res_add_multimedia(node, _zf):
if node.attr["Type"] == "Image":
image = Image(node, _zf, work_folder)
image = Image(node, _zf)
Images[node.attr["ID"]] = image


def res_add_drawparams(node, _zf):
for draw_param in node.children:
DrawParams[draw_param.attr["ID"]] = DrawParam(draw_param)
Loading