diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/document.py b/core/document.py index 56c89e1..3fec951 100644 --- a/core/document.py +++ b/core/document.py @@ -1,18 +1,23 @@ import io import os +import shutil +import tempfile import traceback -from typing import Optional -from zipfile import PyZipFile +from pathlib import Path +from zipfile import BadZipFile, PyZipFile import cssselect2 from defusedxml import ElementTree from .constants import UNITS -from .resources import res_add_font, res_add_multimedia +from .resources import ( + Images, + MultiMedias, + res_add_drawparams, + res_add_font, + res_add_multimedia, +) from .surface import * -from pathlib import Path -import tempfile -import shutil class OFDFile(object): @@ -28,6 +33,8 @@ class OFDFile(object): def __init__(self, fobj): self.zf = fobj if isinstance(fobj, PyZipFile) else PyZipFile(fobj) + if getattr(fobj, "filename", None): + self.zf.filename = getattr(fobj, "filename") # for info in self._zf.infolist(): # print(info) self.node_tree = self.read_node("OFD.xml") @@ -51,10 +58,7 @@ def draw_document(self, doc_num=0, destination: Optional[str] = None): paths = [] for i, page in enumerate(document.pages): surface = Surface(page, os.path.split(self.zf.filename)[-1].strip(".ofd")) - paths.append( - surface.draw(page, destination / Path(f"{surface.filename}_{i}.png")) - ) - shutil.rmtree(self.document.work_folder, ignore_errors=True) + paths.append(surface.draw(page, destination / Path(f"{surface.filename}_{i}.png"))) return paths @@ -62,20 +66,19 @@ class OFDDocument(object): def __init__(self, _zf, node, n=0): self.pages = [] self._zf = _zf - self.work_folder = tempfile.mkdtemp() self.name = f"Doc_{n}" self.node = node - self.physical_box = [ - float(i) - for i in node["CommonData"]["PageArea"]["PhysicalBox"].text.split(" ") - ] + try: + self.physical_box = [ + float(i) for i in node["CommonData"]["PageArea"]["PhysicalBox"].text.split(" ") + ] + except: + self.physical_box = [0.0, 0.0, 210.0, 140.0] self._parse_res() # print('Resources:', Fonts, Images) # assert len(node['CommonData']['TemplatePage']) == len(node['Pages']['Page']) if isinstance(node["Pages"]["Page"], list): - sorted_pages = sorted( - node["Pages"]["Page"], key=lambda x: int(x.attr["ID"]) - ) + sorted_pages = sorted(node["Pages"]["Page"], key=lambda x: int(x.attr["ID"])) else: sorted_pages = [node["Pages"]["Page"]] sorted_tpls = [] @@ -89,33 +92,71 @@ def __init__(self, _zf, node, n=0): seal_node = None if f"{self.name}/Signs/Sign_0/SignedValue.dat" in _zf.namelist(): - seal_file = OFDFile( - io.BytesIO(_zf.read(f"{self.name}/Signs/Sign_0/SignedValue.dat")) - ) - seal_node = seal_file.document.pages[0].page_node + try: + seal_file = OFDFile( + io.BytesIO(_zf.read(f"{self.name}/Signs/Sign_0/SignedValue.dat")) + ) + seal_node = seal_file.document.pages[0].page_node + except BadZipFile as _: + print(f"BadZipFile: {self.name}/Signs/Sign_0/SignedValue.dat") - for i, p in enumerate(sorted_pages): - document = _zf.read(self.name + "/" + sorted_pages[i].attr["BaseLoc"]) - tree = ElementTree.fromstring(document) - root = cssselect2.ElementWrapper.from_xml_root(tree) - page_node = Node(root) + annots = None + if "Annotations" in self.node: + annots = self.get_node_tree(self.name + "/" + self.node["Annotations"].text) + for i, p in enumerate(sorted_pages): + page_id = p.attr["ID"] + page_node = self.get_node_tree(self.name + "/" + sorted_pages[i].attr["BaseLoc"]) + annot_node = None + if annots: + if isinstance(annots["Page"], list): + annot_page = next( + iter([page for page in annots["Page"] if page.attr["PageID"] == page_id]), + None, + ) + if annot_page: + annot_node = self.get_node_tree( + self.name + "/Annots/" + annot_page["FileLoc"].text + ) + elif isinstance(annots["Page"], Node) and annots["Page"].attr["PageID"] == page_id: + annot_node = self.get_node_tree( + self.name + "/Annots/" + annots["Page"]["FileLoc"].text + ) tpl_node = None - if i < len(sorted_tpls): - document = _zf.read(self.name + "/" + sorted_tpls[i].attr["BaseLoc"]) - tree = ElementTree.fromstring(document) - root = cssselect2.ElementWrapper.from_xml_root(tree) - tpl_node = Node(root) + try: + # get tpl_node from ID + tpl = [ + tpl + for tpl in sorted_tpls + if page_node["Template"].attr["TemplateID"] == tpl.attr["ID"] + ][0] + tpl_node = self.get_node_tree(self.name + "/" + tpl.attr["BaseLoc"]) + except: + pass + # fallback using sorted one. + if tpl_node is None and i < len(sorted_tpls): + tpl_node = self.get_node_tree(self.name + "/" + sorted_tpls[i].attr["BaseLoc"]) + self.pages.append( OFDPage( self, f"Page_{i}", + page_id, page_node, tpl_node, seal_node if i == 0 else None, + annot_node=annot_node, ) ) + def get_node_tree(self, location): + if location not in self._zf.namelist(): + return None + document = self._zf.read(location) + tree = ElementTree.fromstring(document) + root = cssselect2.ElementWrapper.from_xml_root(tree) + return Node(root) + def _parse_res(self): if "DocumentRes" in self.node["CommonData"]: node = Node.from_zp_location( @@ -132,7 +173,7 @@ def _parse_res(self): def _parse_res_node(self, node): if node.tag in RESOURCE_TAGS: try: - RESOURCE_TAGS[node.tag](node, self._zf, self.work_folder) + RESOURCE_TAGS[node.tag](node, self._zf) except Exception as e: # Error in point parsing, do nothing print_node_recursive(node) @@ -145,17 +186,27 @@ def _parse_res_node(self, node): class OFDPage(object): - def __init__(self, parent: OFDDocument, name, page_node, tpl_node, seal_node): + + def __init__( + self, + parent: OFDDocument, + name, + page_id, + page_node, + tpl_node, + seal_node=None, + annot_node=None, + ): self.parent = parent + self.page_id = page_id self.name = f"{parent.name}_{name}" self.physical_box = self.parent.physical_box if "Area" in page_node and "PhysicalBox" in page_node["Area"]: - self.physical_box = [ - float(i) for i in page_node["Area"]["PhysicalBox"].text.split(" ") - ] + self.physical_box = [float(i) for i in page_node["Area"]["PhysicalBox"].text.split(" ")] self.tpl_node = tpl_node self.page_node = page_node self.seal_node = seal_node + self.annot_node = annot_node class Surface(object): @@ -179,6 +230,26 @@ def cairo_draw(self, cr, node): print(traceback.format_exc()) pass return # no need to go deeper + if node.tag == "Appearance": + boundary = ( + [float(i) for i in node.attr["Boundary"].split(" ")] + if "Boundary" in node.attr + else [0, 0, 0, 0] + ) + cr.save() + cr.translate(boundary[0], boundary[1]) + for child in node.children: + # Only draw known tags + self.cairo_draw(cr, child) + cr.restore() + return + elif node.tag == "Layer": + try: + cairo_layer(node) + except Exception as e: + # Error in point parsing, do nothing + print_node_recursive(node) + print(traceback.format_exc()) for child in node.children: # Only draw known tags @@ -190,7 +261,7 @@ def draw(self, page, path: Optional[str] = None) -> str: physical_height = self.page.physical_box[3] width = int(physical_width * self.pixels_per_mm) height = int(physical_height * self.pixels_per_mm) - # print(f"create cairo surface, width: {width}, height: {height}") + # print(f'create cairo surface, width: {width}, height: {height}') cairo_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height) self.cr = cairo.Context(cairo_surface) @@ -200,9 +271,13 @@ def draw(self, page, path: Optional[str] = None) -> str: self.cr.paint() self.cr.move_to(0, 0) - self.cairo_draw(self.cr, self.page.tpl_node) + if self.page.tpl_node: + self.cairo_draw(self.cr, self.page.tpl_node) self.cairo_draw(self.cr, self.page.page_node) + if self.page.annot_node: + self.cairo_draw(self.cr, self.page.annot_node) + # self.cr.scale(self.pixels_per_mm, self.pixels_per_mm) # draw StampAnnot if self.page.seal_node: @@ -225,6 +300,7 @@ def draw(self, page, path: Optional[str] = None) -> str: RESOURCE_TAGS = { "Font": res_add_font, "MultiMedia": res_add_multimedia, + "DrawParams": res_add_drawparams, } diff --git a/core/resources.py b/core/resources.py index 7ddc219..3dad335 100644 --- a/core/resources.py +++ b/core/resources.py @@ -1,16 +1,19 @@ +import os import platform + import gi gi.require_version("Gtk", "3.0") gi.require_version("PangoCairo", "1.0") -from gi.repository import PangoCairo -import cairo -from subprocess import Popen, PIPE +from subprocess import PIPE, Popen +import cairo +from gi.repository import PangoCairo Fonts = {} MultiMedias = {} Images = {} +DrawParams = {} font_map = PangoCairo.font_map_get_default() Cairo_Font_Family_Names = [f.get_name() for f in font_map.list_families()] # print(Cairo_Font_Family_Names) @@ -76,7 +79,7 @@ def parse_from_node(node): class Image(MultiMedia): - def __init__(self, node, _zf, work_folder: str): + def __init__(self, node, _zf): super().__init__(node) self.png_location = None self.Format = node.attr["Format"] if "Format" in node.attr else "" @@ -84,15 +87,20 @@ def __init__(self, node, _zf, work_folder: str): if suffix == "jb2": jb2_path = [loc for loc in _zf.namelist() if self.location in loc][0] - x_path = _zf.extract(jb2_path, path=work_folder) + tmp_folder = os.path.basename(_zf.filename).replace(".ofd", "") + x_path = _zf.extract(jb2_path, tmp_folder) png_path = x_path.replace(".jb2", ".png") - if platform.system() == "Windows": Popen(["./bin/jbig2dec", "-o", png_path, x_path], stdout=PIPE) else: Popen(["jbig2dec", "-o", png_path, x_path], stdout=PIPE) self.png_location = png_path + elif suffix == "png": + png_path = [loc for loc in _zf.namelist() if self.location in loc][0] + tmp_folder = os.path.basename(_zf.filename).replace(".ofd", "") + x_path = _zf.extract(png_path, tmp_folder) + self.png_location = x_path def get_cairo_surface(self): if self.png_location: @@ -103,11 +111,55 @@ def __repr__(self): return f"Image ID:{self.ID}, Format:{self.Format}" -def res_add_font(node, _zf, work_folder): +class DrawParam(object): + def __init__(self, node=None): + self.ID = node.attr.get("ID", None) if node else None + self.line_width = node.attr.get("LineWidth", 0.25) if node else 0.25 + + self.stroke_color = ( + next( + iter( + [ + [float(i) / 256.0 for i in child.attr["Value"].split(" ")] + for child in node.children + if child.tag == "StrokeColor" and "Value" in child.attr + ] + ), + [0, 0, 0], + ) + if node + else [0, 0, 0] + ) + self.fill_color = ( + next( + iter( + [ + [float(i) / 256.0 for i in child.attr["Value"].split(" ")] + for child in node.children + if child.tag == "FillColor" and "Value" in child.attr + ] + ), + [0, 0, 0], + ) + if node + else [0, 0, 0] + ) + # print(self) + + def __repr__(self): + return f"ID[{self.ID}], line_width: {self.line_width}, stroke{self.stroke_color}, fill{self.fill_color}" + + +def res_add_font(node, _zf): Fonts[node.attr["ID"]] = Font(node.attr) -def res_add_multimedia(node, _zf, work_folder): +def res_add_multimedia(node, _zf): if node.attr["Type"] == "Image": - image = Image(node, _zf, work_folder) + image = Image(node, _zf) Images[node.attr["ID"]] = image + + +def res_add_drawparams(node, _zf): + for draw_param in node.children: + DrawParams[draw_param.attr["ID"]] = DrawParam(draw_param) diff --git a/core/surface.py b/core/surface.py index b9f708e..49fbcf0 100644 --- a/core/surface.py +++ b/core/surface.py @@ -1,12 +1,14 @@ import re +from math import atan2, cos, hypot, pi, radians, sin + import gi -from .resources import Fonts, Images +from .resources import DrawParam, DrawParams, Fonts, Images gi.require_version("Gtk", "3.0") gi.require_version("PangoCairo", "1.0") -from gi.repository import Pango, PangoCairo import cairo +from gi.repository import Pango, PangoCairo SCALE_192 = 7.559 SCALE_128 = 5.039 @@ -26,6 +28,17 @@ # ) +# https://github.com/Kozea/CairoSVG/blob/main/cairosvg/helpers.py#L95 +def rotate(x, y, angle): + """Rotate a point of an angle around the origin point.""" + return x * cos(angle) - y * sin(angle), y * cos(angle) + x * sin(angle) + + +def point_angle(cx, cy, px, py): + """Return angle between x axis and point knowing given center.""" + return atan2(py - cy, px - cx) + + def _tokenize_path(pathdef): for x in COMMAND_RE.split(pathdef): if x in COMMANDS: @@ -79,18 +92,19 @@ def _cairo_draw_path(cr, boundary, path): if elements[-1] in COMMANDS: command = elements.pop() else: - raise Exception("操作符违法") + raise Exception(f"操作符 {elements[-1]} 违法") if command == "M": x = float(elements.pop()) y = float(elements.pop()) cr.move_to(x, y) - + current_pos = (x, y) elif command == "L": x = float(elements.pop()) y = float(elements.pop()) # pos = (x_start + x, y_start + y) cr.line_to(x, y) + current_pos = (x, y) # draw.line(current_pos + pos, fill=fillColor, width=lineWidth) elif command == "B": @@ -101,15 +115,68 @@ def _cairo_draw_path(cr, boundary, path): x3 = float(elements.pop()) y3 = float(elements.pop()) cr.curve_to(x1, y1, x2, y2, x3, y3) + current_pos = (x3, y3) elif command == "A": - # cr.arc() - pass + # rx ry x-axis-rotation large-arc-flag sweep-flag x y + # A 1.875 1.875 90 0 1 0.125 2 + # GBT_33190-2016_电子文件存储与交换格式版式文档.pdf #9.3.5 + # https://github.com/Kozea/CairoSVG/blob/main/cairosvg/path.py#L209 + ellipse_x, ellipse_y, rotation_angle, large, sweep, x3, y3 = [ + elements.pop() for _ in range(7) + ] + rx, ry = float(ellipse_x), float(ellipse_y) + rotation = radians(float(rotation_angle)) + large, sweep = int(large), int(sweep) + x1, y1 = current_pos + radius = rx + radii_ratio = ry / rx + x3, y3 = float(x3) - x1, float(y3) - y1 + + xe, ye = rotate(x3, y3, -rotation) + ye /= radii_ratio + # Find the angle between the second point and the x axis + angle = point_angle(0, 0, xe, ye) + + # Put the second point onto the x axis + xe = hypot(xe, ye) + ye = 0 + + # Update the x radius if it is too small + rx = max(rx, xe / 2) + + # Find one circle centre + xc = xe / 2 + yc = (rx**2 - xc**2) ** 0.5 + + # Choose between the two circles according to flags + if not (large ^ sweep): + yc = -yc + + # Define the arc sweep + arc = cr.arc if sweep else cr.arc_negative + + # Put the second point and the center back to their positions + xe, ye = rotate(xe, 0, angle) + xc, yc = rotate(xc, yc, angle) + + # Find the drawing angles + angle1 = point_angle(xc, yc, 0, 0) + angle2 = point_angle(xc, yc, xe, ye) + + cr.save() + cr.translate(x1, y1) + cr.rotate(rotation) + cr.scale(1, radii_ratio) + arc(xc, yc, rx, angle1, angle2) + cr.restore() + current_pos = (current_pos[0] + x3, current_pos[1] + y3) elif command == "Q": x1 = float(elements.pop()) y1 = float(elements.pop()) x2 = float(elements.pop()) y2 = float(elements.pop()) cr.curve_to(x1, y1, x1, y1, x2, y2) + current_pos = (x2, y2) elif command == "C": pass @@ -129,22 +196,33 @@ def _trans_Delta(elements, scale=SCALE_192): return parsed -def cairo_path(cr, node): - lineWidth = float(node.attr["LineWidth"]) if "LineWidth" in node.attr else 0.5 +layer_draw: DrawParam = DrawParam() + + +def cairo_layer(node): + global layer_draw + layer_drawparam = node.attr.get("DrawParam", None) + if layer_drawparam in DrawParams: + layer_draw = DrawParams.get(layer_drawparam) + print(layer_draw) + else: + layer_draw = DrawParam() + + +def cairo_path(cr: cairo.Context, node): + lineWidth = layer_draw.line_width if layer_draw.line_width else 0.5 + lineWidth = float(node.attr["LineWidth"]) if "LineWidth" in node.attr else lineWidth boundary = [float(i) for i in node.attr["Boundary"].split(" ")] ctm = None if "CTM" in node.attr: ctm = [float(i) for i in node.attr["CTM"].split(" ")] - fillColor = [0, 0, 0] - if "FillColor" in node: - fillColor = [ - float(i) / 255.0 for i in node["FillColor"].attr["Value"].split(" ") - ] - strokeColor = [0, 0, 0] - if "StrokeColor" in node: - strokeColor = [ - float(i) / 255.0 for i in node["StrokeColor"].attr["Value"].split(" ") - ] + fillColor = layer_draw.fill_color + if "FillColor" in node and "Value" in node.attr: + fillColor = [float(i) / 256.0 for i in node["FillColor"].attr["Value"].split(" ")] + using_fill_color = sum(fillColor) > 0.0 + strokeColor = layer_draw.stroke_color + if "StrokeColor" in node and "Value" in node.attr: + strokeColor = [float(i) / 256.0 for i in node["StrokeColor"].attr["Value"].split(" ")] # print('draw path', boundary, fillColor, strokeColor) cr.save() if ctm: @@ -159,14 +237,17 @@ def cairo_path(cr, node): cr.translate(boundary[0], boundary[1]) AbbreviatedData = node["AbbreviatedData"].text - cr.set_source_rgba(*strokeColor) + if using_fill_color: + cr.set_source_rgba(*fillColor) + else: + cr.set_source_rgba(*strokeColor) cr.set_line_width(lineWidth) _cairo_draw_path(cr, boundary, AbbreviatedData) cr.stroke() cr.restore() -def cairo_text(cr, node): +def cairo_text(cr: cairo.Context, node): boundary = [float(i) for i in node.attr["Boundary"].split(" ")] ctm = None if "CTM" in node.attr: @@ -174,17 +255,13 @@ def cairo_text(cr, node): font_id = node.attr["Font"] font_family = get_font_from_id(font_id).get_font_family() font_size = float(node.attr["Size"]) / 1.3 - fillColor = [0, 0, 0] - if "FillColor" in node: - fillColor = [ - float(i) / 255.0 for i in node["FillColor"].attr["Value"].split(" ") - ] - - strokeColor = [0, 0, 0] - if "StrokeColor" in node: - strokeColor = [ - float(i) / 255.0 for i in node["StrokeColor"].attr["Value"].split(" ") - ] + fillColor = layer_draw.fill_color + if "FillColor" in node and "Value" in node["FillColor"].attr: + fillColor = [float(i) / 255.0 for i in node["FillColor"].attr["Value"].split(" ")] + + strokeColor = layer_draw.stroke_color + if "StrokeColor" in node and "Value" in node["StrokeColor"].attr: + strokeColor = [float(i) / 255.0 for i in node["StrokeColor"].attr["Value"].split(" ")] TextCode = node["TextCode"] text = TextCode.text @@ -195,14 +272,18 @@ def cairo_text(cr, node): if "DeltaX" in TextCode.attr: deltaX = _trans_Delta(TextCode.attr["DeltaX"].split(" "), scale=1) if deltaX and len(deltaX) + 1 != len(text): - # raise Exception('TextCode DeltaX 与字符个数不符') + # raise Exception(f'{text} TextCode DeltaX 与字符个数不符') deltaX = deltaX[: len(text) - 1] + if deltaX and len(deltaX) < len(text) - 1: + deltaX.extend([deltaX[-1]] * (len(text) - 1 - len(deltaX))) if "DeltaY" in TextCode.attr: deltaY = _trans_Delta(TextCode.attr["DeltaY"].split(" "), scale=1) if deltaY and len(deltaY) + 1 != len(text): - # raise Exception('TextCode DeltaY 与字符个数不符') + # raise Exception(f'{text} TextCode DeltaY 与字符个数不符') deltaY = deltaY[: len(text) - 1] + if deltaY and len(deltaY) < len(text) - 1: + deltaY.extend([deltaY[-1]] * (len(text) - 1 - len(deltaY))) X = float(TextCode.attr["X"]) Y = float(TextCode.attr["Y"]) @@ -237,7 +318,7 @@ def cairo_text(cr, node): pass -def cairo_image(cr, node): +def cairo_image(cr: cairo.Context, node): resource_id = node.attr["ResourceID"] boundary = [float(i) for i in node.attr["Boundary"].split(" ")] ctm = None @@ -247,7 +328,8 @@ def cairo_image(cr, node): cr.save() x, y = boundary[0], boundary[1] - width, height = cr.get_matrix().transform_point(boundary[2], boundary[3]) + width = cr.get_matrix().xx * boundary[2] + height = cr.get_matrix().yy * boundary[3] # print('cairo image ctm:', ctm) # ctm用不到 x, y = cr.get_matrix().transform_point(x, y)