diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/document.py b/core/document.py index e23a9a6..0e33a1a 100644 --- a/core/document.py +++ b/core/document.py @@ -1,13 +1,13 @@ import io import os import traceback -from zipfile import PyZipFile +from zipfile import PyZipFile, BadZipFile import cssselect2 from defusedxml import ElementTree from .constants import UNITS -from .resources import res_add_font, res_add_multimedia, MultiMedias, Images +from .resources import res_add_font, res_add_multimedia, res_add_drawparams, MultiMedias, Images from .surface import * @@ -23,6 +23,8 @@ class OFDFile(object): def __init__(self, fobj): self.zf = fobj if isinstance(fobj, PyZipFile) else PyZipFile(fobj) + if getattr(fobj, 'filename', None): + self.zf.filename = getattr(fobj, 'filename') # for info in self._zf.infolist(): # print(info) self.node_tree = self.read_node('OFD.xml') @@ -46,14 +48,17 @@ def draw_document(self, doc_num=0): paths.append(surface.draw(page)) return paths -class OFDDocument(object): +class OFDDocument(object): def __init__(self, _zf, node, n=0): self.pages = [] self._zf = _zf self.name = f'Doc_{n}' self.node = node - self.physical_box = [float(i) for i in node['CommonData']['PageArea']['PhysicalBox'].text.split(' ')] + try: + self.physical_box = [float(i) for i in node['CommonData']['PageArea']['PhysicalBox'].text.split(' ')] + except: + self.physical_box = [0.0, 0.0, 210.0, 140.0] self._parse_res() # print('Resources:', Fonts, Images) # assert len(node['CommonData']['TemplatePage']) == len(node['Pages']['Page']) @@ -70,22 +75,48 @@ def __init__(self, _zf, node, n=0): seal_node = None if f'{self.name}/Signs/Sign_0/SignedValue.dat' in _zf.namelist(): - seal_file = OFDFile(io.BytesIO(_zf.read(f'{self.name}/Signs/Sign_0/SignedValue.dat'))) - seal_node = seal_file.document.pages[0].page_node + try: + seal_file = OFDFile(io.BytesIO(_zf.read(f'{self.name}/Signs/Sign_0/SignedValue.dat'))) + seal_node = seal_file.document.pages[0].page_node + except BadZipFile as _: + print(f'BadZipFile: {self.name}/Signs/Sign_0/SignedValue.dat') - for i, p in enumerate(sorted_pages): - document = _zf.read(self.name + '/' + sorted_pages[i].attr['BaseLoc']) - tree = ElementTree.fromstring(document) - root = cssselect2.ElementWrapper.from_xml_root(tree) - page_node = Node(root) + annots = None + if 'Annotations' in self.node: + annots = self.get_node_tree(self.name + '/' + self.node['Annotations'].text) + for i, p in enumerate(sorted_pages): + page_id = p.attr['ID'] + page_node = self.get_node_tree(self.name + '/' + sorted_pages[i].attr['BaseLoc']) + annot_node = None + if annots: + if isinstance(annots['Page'], list): + annot_page = next(iter([page for page in annots['Page'] if page.attr['PageID'] == page_id]), None) + if annot_page: + annot_node = self.get_node_tree(self.name + '/Annots/' + annot_page['FileLoc'].text) + elif isinstance(annots['Page'], Node) and annots['Page'].attr['PageID'] == page_id: + annot_node = self.get_node_tree(self.name + '/Annots/' + annots['Page']['FileLoc'].text) tpl_node = None - if i < len(sorted_tpls): - document = _zf.read(self.name + '/' + sorted_tpls[i].attr['BaseLoc']) - tree = ElementTree.fromstring(document) - root = cssselect2.ElementWrapper.from_xml_root(tree) - tpl_node = Node(root) - self.pages.append(OFDPage(self, f'Page_{i}', page_node, tpl_node, seal_node if i == 0 else None)) + try: + # get tpl_node from ID + tpl = [tpl for tpl in sorted_tpls if page_node['Template'].attr['TemplateID'] == tpl.attr['ID']][0] + tpl_node = self.get_node_tree(self.name + '/' + tpl.attr['BaseLoc']) + except: + pass + # fallback using sorted one. + if tpl_node is None and i < len(sorted_tpls): + tpl_node = self.get_node_tree(self.name + '/' + sorted_tpls[i].attr['BaseLoc']) + + self.pages.append(OFDPage(self, f'Page_{i}', page_id, page_node, tpl_node, seal_node if i == 0 else None, + annot_node=annot_node)) + + def get_node_tree(self, location): + if location not in self._zf.namelist(): + return None + document = self._zf.read(location) + tree = ElementTree.fromstring(document) + root = cssselect2.ElementWrapper.from_xml_root(tree) + return Node(root) def _parse_res(self): if 'DocumentRes' in self.node['CommonData']: @@ -113,8 +144,9 @@ def _parse_res_node(self, node): class OFDPage(object): - def __init__(self, parent: OFDDocument, name, page_node, tpl_node, seal_node): + def __init__(self, parent: OFDDocument, name, page_id, page_node, tpl_node, seal_node=None, annot_node=None): self.parent = parent + self.page_id = page_id self.name = f'{parent.name}_{name}' self.physical_box = self.parent.physical_box if 'Area' in page_node and 'PhysicalBox' in page_node['Area']: @@ -122,6 +154,7 @@ def __init__(self, parent: OFDDocument, name, page_node, tpl_node, seal_node): self.tpl_node = tpl_node self.page_node = page_node self.seal_node = seal_node + self.annot_node = annot_node class Surface(object): @@ -146,6 +179,22 @@ def cairo_draw(self, cr, node): print(traceback.format_exc()) pass return # no need to go deeper + if node.tag == 'Appearance': + boundary = [float(i) for i in node.attr['Boundary'].split(' ')] if 'Boundary' in node.attr else [0, 0, 0, 0] + cr.save() + cr.translate(boundary[0], boundary[1]) + for child in node.children: + # Only draw known tags + self.cairo_draw(cr, child) + cr.restore() + return + elif node.tag == 'Layer': + try: + cairo_layer(node) + except Exception as e: + # Error in point parsing, do nothing + print_node_recursive(node) + print(traceback.format_exc()) for child in node.children: # Only draw known tags @@ -157,7 +206,7 @@ def draw(self, page): physical_height = self.page.physical_box[3] width = int(physical_width * self.pixels_per_mm) height = int(physical_height * self.pixels_per_mm) - print(f'create cairo surface, width: {width}, height: {height}') + # print(f'create cairo surface, width: {width}, height: {height}') cairo_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height) self.cr = cairo.Context(cairo_surface) @@ -167,9 +216,13 @@ def draw(self, page): self.cr.paint() self.cr.move_to(0, 0) - self.cairo_draw(self.cr, self.page.tpl_node) + if self.page.tpl_node: + self.cairo_draw(self.cr, self.page.tpl_node) self.cairo_draw(self.cr, self.page.page_node) + if self.page.annot_node: + self.cairo_draw(self.cr, self.page.annot_node) + # self.cr.scale(self.pixels_per_mm, self.pixels_per_mm) # draw StampAnnot if self.page.seal_node: @@ -192,6 +245,7 @@ def draw(self, page): RESOURCE_TAGS = { 'Font': res_add_font, 'MultiMedia': res_add_multimedia, + 'DrawParams': res_add_drawparams, } diff --git a/core/resources.py b/core/resources.py index e73509f..a86d58d 100644 --- a/core/resources.py +++ b/core/resources.py @@ -1,5 +1,6 @@ import platform import gi +import os gi.require_version("Gtk", "3.0") gi.require_version('PangoCairo', '1.0') @@ -11,6 +12,7 @@ Fonts = {} MultiMedias = {} Images = {} +DrawParams = {} font_map = PangoCairo.font_map_get_default() Cairo_Font_Family_Names = [f.get_name() for f in font_map.list_families()] # print(Cairo_Font_Family_Names) @@ -77,8 +79,9 @@ def __init__(self, node, _zf): # print('tempdir', tempfile.gettempdir()) jb2_path = [loc for loc in _zf.namelist() if self.location in loc][0] - png_path = jb2_path.replace('.jb2', '.png') - x_path = _zf.extract(jb2_path) + tmp_folder = os.path.basename(_zf.filename).replace('.ofd', '') + x_path = _zf.extract(jb2_path, tmp_folder) + png_path = x_path.replace('.jb2', '.png') if platform.system() == 'Windows': Popen(['./bin/jbig2dec', '-o', png_path, x_path], stdout=PIPE) else: @@ -86,6 +89,11 @@ def __init__(self, node, _zf): # print(f'jbig2dec {png_path}', output.stdout.read()) self.png_location = png_path + elif suffix == 'png': + png_path = [loc for loc in _zf.namelist() if self.location in loc][0] + tmp_folder = os.path.basename(_zf.filename).replace('.ofd', '') + x_path = _zf.extract(png_path, tmp_folder) + self.png_location = x_path def get_cairo_surface(self): if self.png_location: @@ -96,6 +104,24 @@ def __repr__(self): return f'Image ID:{self.ID}, Format:{self.Format}' +class DrawParam(object): + def __init__(self, node=None): + self.ID = node.attr.get('ID', None) if node else None + self.line_width = node.attr.get('LineWidth', 0.25) if node else 0.25 + + self.stroke_color = next(iter( + [[float(i) / 256. for i in child.attr['Value'].split(' ')] + for child in node.children if child.tag == 'StrokeColor' and 'Value' in child.attr] + ), [0, 0, 0]) if node else [0, 0, 0] + self.fill_color = next(iter( + [[float(i) / 256. for i in child.attr['Value'].split(' ')] + for child in node.children if child.tag == 'FillColor' and 'Value' in child.attr] + ), [0, 0, 0]) if node else [0, 0, 0] + # print(self) + + def __repr__(self): + return f'ID[{self.ID}], line_width: {self.line_width}, stroke{self.stroke_color}, fill{self.fill_color}' + def res_add_font(node, _zf): Fonts[node.attr['ID']] = Font(node.attr) @@ -104,3 +130,8 @@ def res_add_multimedia(node, _zf): if node.attr['Type'] == 'Image': image = Image(node, _zf) Images[node.attr['ID']] = image + + +def res_add_drawparams(node, _zf): + for draw_param in node.children: + DrawParams[draw_param.attr['ID']] = DrawParam(draw_param) \ No newline at end of file diff --git a/core/surface.py b/core/surface.py index 3c793bc..e227575 100644 --- a/core/surface.py +++ b/core/surface.py @@ -1,7 +1,8 @@ +from math import pi, sin, cos, hypot, atan2, radians import re import gi -from .resources import Fonts, Images +from .resources import Fonts, Images, DrawParams, DrawParam gi.require_version("Gtk", "3.0") gi.require_version('PangoCairo', '1.0') @@ -19,6 +20,17 @@ 'song' in f.get_name().lower() or 'cour' in f.get_name().lower() or 'kai' in f.get_name().lower()]) +# https://github.com/Kozea/CairoSVG/blob/main/cairosvg/helpers.py#L95 +def rotate(x, y, angle): + """Rotate a point of an angle around the origin point.""" + return x * cos(angle) - y * sin(angle), y * cos(angle) + x * sin(angle) + + +def point_angle(cx, cy, px, py): + """Return angle between x axis and point knowing given center.""" + return atan2(py - cy, px - cx) + + def _tokenize_path(pathdef): for x in COMMAND_RE.split(pathdef): if x in COMMANDS: @@ -70,18 +82,19 @@ def _cairo_draw_path(cr, boundary, path): if elements[-1] in COMMANDS: command = elements.pop() else: - raise Exception('操作符违法') + raise Exception(f'操作符 {elements[-1]} 违法') if command == 'M': x = float(elements.pop()) y = float(elements.pop()) cr.move_to(x, y) - + current_pos = (x, y) elif command == 'L': x = float(elements.pop()) y = float(elements.pop()) # pos = (x_start + x, y_start + y) cr.line_to(x, y) + current_pos = (x, y) # draw.line(current_pos + pos, fill=fillColor, width=lineWidth) elif command == 'B': @@ -92,15 +105,66 @@ def _cairo_draw_path(cr, boundary, path): x3 = float(elements.pop()) y3 = float(elements.pop()) cr.curve_to(x1, y1, x2, y2, x3, y3) + current_pos = (x3, y3) elif command == 'A': - # cr.arc() - pass + # rx ry x-axis-rotation large-arc-flag sweep-flag x y + # A 1.875 1.875 90 0 1 0.125 2 + # GBT_33190-2016_电子文件存储与交换格式版式文档.pdf #9.3.5 + # https://github.com/Kozea/CairoSVG/blob/main/cairosvg/path.py#L209 + ellipse_x, ellipse_y, rotation_angle, large, sweep, x3, y3 = [elements.pop() for _ in range(7)] + rx, ry = float(ellipse_x), float(ellipse_y) + rotation = radians(float(rotation_angle)) + large, sweep = int(large), int(sweep) + x1, y1 = current_pos + radius = rx + radii_ratio = ry / rx + x3, y3 = float(x3) - x1, float(y3) - y1 + + xe, ye = rotate(x3, y3, -rotation) + ye /= radii_ratio + # Find the angle between the second point and the x axis + angle = point_angle(0, 0, xe, ye) + + # Put the second point onto the x axis + xe = hypot(xe, ye) + ye = 0 + + # Update the x radius if it is too small + rx = max(rx, xe / 2) + + # Find one circle centre + xc = xe / 2 + yc = (rx ** 2 - xc ** 2) ** .5 + + # Choose between the two circles according to flags + if not (large ^ sweep): + yc = -yc + + # Define the arc sweep + arc = cr.arc if sweep else cr.arc_negative + + # Put the second point and the center back to their positions + xe, ye = rotate(xe, 0, angle) + xc, yc = rotate(xc, yc, angle) + + # Find the drawing angles + angle1 = point_angle(xc, yc, 0, 0) + angle2 = point_angle(xc, yc, xe, ye) + + cr.save() + cr.translate(x1, y1) + cr.rotate(rotation) + cr.scale(1, radii_ratio) + arc(xc, yc, rx, angle1, angle2) + cr.restore() + current_pos = (current_pos[0] + x3, current_pos[1] + y3) elif command == 'Q': x1 = float(elements.pop()) y1 = float(elements.pop()) x2 = float(elements.pop()) y2 = float(elements.pop()) cr.curve_to(x1, y1, x1, y1, x2, y2) + current_pos = (x2, y2) elif command == 'C': pass @@ -120,18 +184,33 @@ def _trans_Delta(elements, scale=SCALE_192): return parsed -def cairo_path(cr, node): - lineWidth = float(node.attr['LineWidth']) if 'LineWidth' in node.attr else 0.5 +layer_draw: DrawParam = DrawParam() + + +def cairo_layer(node): + global layer_draw + layer_drawparam = node.attr.get('DrawParam', None) + if layer_drawparam in DrawParams: + layer_draw = DrawParams.get(layer_drawparam) + print(layer_draw) + else: + layer_draw = DrawParam() + + +def cairo_path(cr: cairo.Context, node): + lineWidth = layer_draw.line_width if layer_draw.line_width else 0.5 + lineWidth = float(node.attr['LineWidth']) if 'LineWidth' in node.attr else lineWidth boundary = [float(i) for i in node.attr['Boundary'].split(' ')] ctm = None if 'CTM' in node.attr: ctm = [float(i) for i in node.attr['CTM'].split(' ')] - fillColor = [0, 0, 0] - if 'FillColor' in node: - fillColor = [float(i) / 255. for i in node['FillColor'].attr['Value'].split(' ')] - strokeColor = [0, 0, 0] - if 'StrokeColor' in node: - strokeColor = [float(i) / 255. for i in node['StrokeColor'].attr['Value'].split(' ')] + fillColor = layer_draw.fill_color + if 'FillColor' in node and 'Value' in node.attr: + fillColor = [float(i) / 256. for i in node['FillColor'].attr['Value'].split(' ')] + using_fill_color = sum(fillColor) > 0.0 + strokeColor = layer_draw.stroke_color + if 'StrokeColor' in node and 'Value' in node.attr: + strokeColor = [float(i) / 256. for i in node['StrokeColor'].attr['Value'].split(' ')] # print('draw path', boundary, fillColor, strokeColor) cr.save() if ctm: @@ -146,14 +225,17 @@ def cairo_path(cr, node): cr.translate(boundary[0], boundary[1]) AbbreviatedData = node['AbbreviatedData'].text - cr.set_source_rgba(*strokeColor) + if using_fill_color: + cr.set_source_rgba(*fillColor) + else: + cr.set_source_rgba(*strokeColor) cr.set_line_width(lineWidth) _cairo_draw_path(cr, boundary, AbbreviatedData) cr.stroke() cr.restore() -def cairo_text(cr, node): +def cairo_text(cr: cairo.Context, node): boundary = [float(i) for i in node.attr['Boundary'].split(' ')] ctm = None if 'CTM' in node.attr: @@ -161,12 +243,12 @@ def cairo_text(cr, node): font_id = node.attr['Font'] font_family = get_font_from_id(font_id).get_font_family() font_size = float(node.attr['Size']) / 1.3 - fillColor = [0, 0, 0] - if 'FillColor' in node: + fillColor = layer_draw.fill_color + if 'FillColor' in node and 'Value' in node['FillColor'].attr: fillColor = [float(i) / 255. for i in node['FillColor'].attr['Value'].split(' ')] - strokeColor = [0, 0, 0] - if 'StrokeColor' in node: + strokeColor = layer_draw.stroke_color + if 'StrokeColor' in node and 'Value' in node['StrokeColor'].attr: strokeColor = [float(i) / 255. for i in node['StrokeColor'].attr['Value'].split(' ')] TextCode = node['TextCode'] @@ -178,14 +260,18 @@ def cairo_text(cr, node): if 'DeltaX' in TextCode.attr: deltaX = _trans_Delta(TextCode.attr['DeltaX'].split(' '), scale=1) if deltaX and len(deltaX) + 1 != len(text): - # raise Exception('TextCode DeltaX 与字符个数不符') + # raise Exception(f'{text} TextCode DeltaX 与字符个数不符') deltaX = deltaX[:len(text)-1] + if deltaX and len(deltaX) < len(text) - 1: + deltaX.extend([deltaX[-1]] * (len(text) - 1 - len(deltaX))) if 'DeltaY' in TextCode.attr: deltaY = _trans_Delta(TextCode.attr['DeltaY'].split(' '), scale=1) if deltaY and len(deltaY) + 1 != len(text): - # raise Exception('TextCode DeltaY 与字符个数不符') + # raise Exception(f'{text} TextCode DeltaY 与字符个数不符') deltaY = deltaY[:len(text)-1] + if deltaY and len(deltaY) < len(text) - 1: + deltaY.extend([deltaY[-1]] * (len(text) - 1 - len(deltaY))) X = float(TextCode.attr['X']) Y = float(TextCode.attr['Y']) @@ -220,7 +306,7 @@ def cairo_text(cr, node): pass -def cairo_image(cr, node): +def cairo_image(cr: cairo.Context, node): resource_id = node.attr['ResourceID'] boundary = [float(i) for i in node.attr['Boundary'].split(' ')] ctm = None @@ -230,7 +316,8 @@ def cairo_image(cr, node): cr.save() x, y = boundary[0], boundary[1] - width, height = cr.get_matrix().transform_point(boundary[2], boundary[3]) + width = cr.get_matrix().xx * boundary[2] + height = cr.get_matrix().yy * boundary[3] # print('cairo image ctm:', ctm) # ctm用不到 x, y = cr.get_matrix().transform_point(x, y)