Skip to content

Made it faster and memory efficient, more type safe and so on #12

@dantetemplar

Description

@dantetemplar

Just rewrite your package for my own purposes, maybe you will find something for you.

# Based on https://github.com/AndyEveritt/GcodeParser/blob/master/gcodeparser/gcode_parser.py
# Rewritten to be faster, memory efficient and typed 
import io
import re
from collections.abc import Iterator
from dataclasses import dataclass
from enum import Enum
from typing import Any

# Regex patterns compiled for performance
GCODE_LINE_PATTERN = re.compile(
    r'(?!; *.+)(G|M|T|g|m|t)(\d+)(([ \t]*(?!G|M|g|m)\w(".*"|([-+\d\.]*)))*)[ \t]*(;[ \t]*(.*))?|;[ \t]*(.+)'
)
PARAMS_PATTERN = re.compile(r'((?!\d)\w+?)\s*(".*"|(\d+\.?)+|[-+]?\d*\.?\d*)')
DOUBLE_DOT_PATTERN = re.compile(r"\..*\.")
FLOAT_PATTERN = re.compile(r"[+-]?\d*\.\d+")


class Commands(Enum):
    COMMENT = 0
    MOVE = 1
    OTHER = 2
    TOOLCHANGE = 3


@dataclass
class GcodeLine:
    command: tuple[str, int] | tuple[str, None]
    params: dict[str, float | str]
    comment: str
    line_index: int

    def __post_init__(self):
        if self.command[0] == "G" and self.command[1] in (0, 1, 2, 3):
            self.type = Commands.MOVE
        elif self.command[0] == ";":
            self.type = Commands.COMMENT
        elif self.command[0] == "T":
            self.type = Commands.TOOLCHANGE
        else:
            self.type = Commands.OTHER

    @property
    def command_str(self) -> str:
        return f"{self.command[0]}{self.command[1] if self.command[1] is not None else ''}"

    def get_param(
        self,
        param: str,
        return_type: type[Any] | None = None,
        default: float | str | bool | None = None,
    ) -> float | str | bool | None:
        """
        Returns the value of the param if it exists, otherwise it will the default value.
        If `return_type` is set, the return value will be type cast.
        """
        try:
            if return_type is None:
                return self.params[param]
            else:
                return return_type(self.params[param])
        except KeyError:
            return default

    def update_param(self, param: str, value: int | float) -> float | str | bool | None:
        if self.get_param(param) is None:
            return None
        if type(value) not in (int, float):
            raise TypeError(f"Type {type(value)} is not a valid parameter type")
        self.params[param] = value
        return self.get_param(param)

    def delete_param(self, param: str) -> None:
        if self.get_param(param) is None:
            return
        self.params.pop(param)

    @property
    def gcode_str(self) -> str:
        command = self.command_str

        def param_value(param: str) -> str:
            value = self.get_param(param)
            is_flag_parameter = value is True
            if is_flag_parameter:
                return ""
            return str(value)

        params = " ".join(f"{param}{param_value(param)}" for param in self.params.keys())
        comment = f"; {self.comment}" if self.comment != "" else ""
        if command == ";":
            return comment
        return f"{command} {params} {comment}".strip()


def parse_gcode_lines(gcode: io.TextIOBase | io.StringIO | str, include_comments: bool = True) -> Iterator[GcodeLine]:
    """
    Parse gcode from a file-like object, StringIO object, or string and yield GcodeLine objects one at a time.

    Args:
        gcode: The gcode content as a file-like object, StringIO object, or string
        include_comments: Whether to include comment-only lines

    Yields:
        GcodeLine objects representing parsed gcode commands
    """
    if isinstance(gcode, str):
        gcode = io.StringIO(gcode)

    for line_index, gcode_line in enumerate(gcode):
        match = GCODE_LINE_PATTERN.search(gcode_line)
        if not match:
            continue

        line = match.groups()
        if line[0]:
            command: tuple[str, int] | tuple[str, None] = (line[0].upper(), int(line[1]))
            comment = line[-2] or ""
            params = parse_parameters(line[2] or "")

        elif include_comments:
            command = (";", None)
            comment = line[-1] or ""
            params = {}

        else:
            continue

        yield GcodeLine(
            command=command,
            params=params,
            comment=comment.strip() if comment else "",
            line_index=line_index,
        )


def infer_element_type(element: str) -> type[int] | type[float] | type[str]:
    """
    Infer the Python type of a gcode parameter element.

    Args:
        element: The parameter value string

    Returns:
        The inferred type (int, float, or str)
    """
    if '"' in element or DOUBLE_DOT_PATTERN.search(element):
        return str
    if FLOAT_PATTERN.search(element):
        return float
    return int


def parse_parameters(line: str) -> dict[str, float | str | bool]:
    """
    Parse parameter string from a gcode line into a dictionary.

    Args:
        line: The parameter portion of a gcode line

    Returns:
        Dictionary mapping parameter names to their values
    """
    elements = PARAMS_PATTERN.findall(line)
    params: dict[str, float | str | bool] = {}
    for element in elements:
        if element[1] == "":
            params[element[0].upper()] = True
            continue
        element_type = infer_element_type(element[1])
        params[element[0].upper()] = element_type(element[1])

    return params


if __name__ == "__main__":
    import time
    from pathlib import Path

    path = Path("plate_1.gcode")
    if not path.exists():
        print(f"File {path} does not exist")
        exit(1)

    t1 = time.perf_counter()
    with open(path) as f:
        lines = list(parse_gcode_lines(f, include_comments=False))
    t2 = time.perf_counter()
    print(f"Time taken: {t2 - t1:.2f} seconds")
    print(f"Parsed {len(lines)} G-code commands")

    # Print first few G-code commands as example
    for line in lines[:5]:
        print(f"Line {line.line_index}: {line.command_str} - {line.params} - {line.comment}")

Main changes

  • Switched from class-based GcodeParser to a functional, streaming API:

    • Removed GcodeParser, get_lines, and split_params.
    • Introduced parse_gcode_lines(gcode, include_comments=True) -> Iterator[GcodeLine] that works with str, io.TextIOBase, and io.StringIO.
    • GcodeLine now includes line_index for original file position.
  • Localized and simplified dependencies:

    • Inlined Commands as a local Enum instead of importing from .commands.
    • Added modern, precise typing (Iterator, dict[str, …], tuple[str, int] | tuple[str, None], etc.).
  • Performance and memory improvements:

    • Compiled regex patterns once at module import (GCODE_LINE_PATTERN, PARAMS_PATTERN, etc.).
    • Parse line-by-line from a stream instead of re.findall on the whole file string, reducing peak memory and improving scalability for large G-code files.
  • Robustness and parsing tweaks:

    • parse_parameters renamed and refactored; now uses a precompiled PARAMS_PATTERN that allows whitespace between parameter name and value.
    • GcodeLine.comment is safely normalized (comment.strip() if comment else ""), avoiding potential None.strip() issues.
    • Type detection for parameter values factored into infer_element_type with precompiled patterns; still supports ints, floats, strings, and flag parameters (X with no value → True).
  • Usability:

    • GcodeLine keeps the same interface (get_param, update_param, delete_param, gcode_str, command_str) but now has explicit typing, making it easier to use in typed codebases.
    • Added a minimal __main__ benchmark example that demonstrates streaming use and prints first parsed lines.

Pros

  • Much lower memory footprint and better performance on large .gcode files thanks to streaming and compiled regexes.
  • Cleaner public surface: one main entry point (parse_gcode_lines) and a well-typed GcodeLine.
  • Easier to integrate into pipelines that already work with file-like objects; no need to read the entire file into a string.
  • More robust parameter parsing (handles whitespace between parameter and value) and safer comment handling.
  • Fewer external dependencies (no .commands import), making the module more self-contained.

Cons / breaking changes

  • GcodeParser class and helper functions (get_lines, split_params, element_type) are removed; any code using them will need to migrate to parse_gcode_lines.
  • The default for include_comments changed from False (via GcodeParser / get_lines) to True in parse_gcode_lines, which may change behavior for callers relying on the old default.
  • Commands is now defined locally; code that imported Commands from .commands may need adjustment if it expected to share the same enum.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions