Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Run Python Basics Integration Tests
name: Run Python Basics Integration and Unit Tests
on:
pull_request:
types: [opened, reopened, synchronize]
Expand All @@ -16,3 +16,7 @@ jobs:
run: |
cd starpls/integration_tests
bazel test //...
- name: Run cr_checker unit tests
run: |
cd cr_checker/tests
bazel test //...
Empty file removed cr_checker/tests/.keep
Empty file.
24 changes: 24 additions & 0 deletions cr_checker/tests/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# *******************************************************************************
# Copyright (c) 2025 Contributors to the Eclipse Foundation
#
# See the NOTICE file(s) distributed with this work for additional
# information regarding copyright ownership.
#
# This program and the accompanying materials are made available under the
# terms of the Apache License Version 2.0 which is available at
# https://www.apache.org/licenses/LICENSE-2.0
#
# SPDX-License-Identifier: Apache-2.0
# *******************************************************************************

load("@score_tooling//python_basics:defs.bzl", "score_py_pytest")

score_py_pytest(
name = "shebang_unit_tests",
srcs = [
"test_shebang_handling.py",
],
deps = [
"@score_tooling//cr_checker/tool:cr_checker_lib",
],
)
49 changes: 49 additions & 0 deletions cr_checker/tests/MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# *******************************************************************************
# Copyright (c) 2025 Contributors to the Eclipse Foundation
#
# See the NOTICE file(s) distributed with this work for additional
# information regarding copyright ownership.
#
# This program and the accompanying materials are made available under the
# terms of the Apache License Version 2.0 which is available at
# https://www.apache.org/licenses/LICENSE-2.0
#
# SPDX-License-Identifier: Apache-2.0
# *******************************************************************************
module(
name = "score_cr_checker_tests",
version = "0.1.0",
compatibility_level = 0,
)

bazel_dep(name = "rules_shell", version = "0.5.0")

# begin Tests

# PYTHON
bazel_dep(name = "rules_python", version = "1.4.1")

PYTHON_VERSION = "3.12"

python = use_extension("@rules_python//python/extensions:python.bzl", "python")
python.toolchain(
python_version = PYTHON_VERSION,
)
use_repo(python)

# PIP
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
pip.parse(
hub_name = "pip_deps_test",
python_version = PYTHON_VERSION,
requirements_lock = "//:requirements_lock.txt",
)
use_repo(pip, "pip_deps_test")

bazel_dep(name = "score_tooling", version = "0.0.0")
local_path_override(
module_name = "score_tooling",
path = "../../",
)

# end Tests
1 change: 1 addition & 0 deletions cr_checker/tests/requirements_lock.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bazel-runfiles==1.3.0
169 changes: 169 additions & 0 deletions cr_checker/tests/test_shebang_handling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# *******************************************************************************
# Copyright (c) 2024 Contributors to the Eclipse Foundation
#
# See the NOTICE file(s) distributed with this work for additional
# information regarding copyright ownership.
#
# This program and the accompanying materials are made available under the
# terms of the Apache License Version 2.0 which is available at
# https://www.apache.org/licenses/LICENSE-2.0
#
# SPDX-License-Identifier: Apache-2.0
# *******************************************************************************
# unit tests for the shebang handling in the cr_checker module
from __future__ import annotations

import importlib.util
import json
from datetime import datetime
from pathlib import Path


# load the cr_checker module
def load_cr_checker_module():
module_path = Path(__file__).resolve().parents[1] / "tool" / "cr_checker.py"
spec = importlib.util.spec_from_file_location("cr_checker_module", module_path)
if spec is None or spec.loader is None:
raise RuntimeError(f"Failed to load cr_checker module from {module_path}")

module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module


# load the license template
def load_py_template() -> str:
cr_checker = load_cr_checker_module()
template_file = Path(__file__).resolve().parents[1] / "resources" / "templates.ini"
templates = cr_checker.load_templates(template_file)
return templates["py"]


# write the config file here so that the year is always up to date with the year
# written in the mock "script.py" file
def write_config(path: Path, years: list[int]) -> Path:
config_path = path / "config.json"
config_path.write_text(json.dumps({"years": years}), encoding="utf-8")
return config_path


# test that offset matches the length of the shebang line including trailing newlines
def test_detect_shebang_offset_counts_trailing_newlines(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
script.write_text(
"#!/usr/bin/env python3\n\nprint('hi')\n",
encoding="utf-8",
)

offset = cr_checker.detect_shebang_offset(script, "utf-8")

assert offset == len("#!/usr/bin/env python3\n\n".encode("utf-8"))


# test that process_files function validates a license header after the shebang line
def test_process_files_accepts_header_after_shebang(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
header_template = load_py_template()
current_year = datetime.now().year
header = header_template.format(year=current_year)
script.write_text(
"#!/usr/bin/env python3\n" + header + "print('hi')\n",
encoding="utf-8",
)
config = write_config(tmp_path, [current_year])

results = cr_checker.process_files(
[script],
{"py": header_template},
False,
config,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["no_copyright"] == 0


# test that process_files function fixes a missing license header after the shebang line
def test_process_files_fix_inserts_header_after_shebang(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
script.write_text(
"#!/usr/bin/env python3\nprint('hi')\n",
encoding="utf-8",
)
header_template = load_py_template()
current_year = datetime.now().year
config = write_config(tmp_path, [current_year])

results = cr_checker.process_files(
[script],
{"py": header_template},
True,
config,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["fixed"] == 1
assert results["no_copyright"] == 1
expected_header = header_template.format(year=current_year)
assert script.read_text(encoding="utf-8") == (
"#!/usr/bin/env python3\n" + expected_header + "print('hi')\n"
)


# test that process_files function validates a license header without the shebang line
def test_process_files_accepts_header_without_shebang(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
header_template = load_py_template()
current_year = datetime.now().year
header = header_template.format(year=current_year)
script.write_text(header + "print('hi')\n", encoding="utf-8")
config = write_config(tmp_path, [current_year])

results = cr_checker.process_files(
[script],
{"py": header_template},
False,
config,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["no_copyright"] == 0


# test that process_files function fixes a missing license header without the shebang
def test_process_files_fix_inserts_header_without_shebang(tmp_path):
cr_checker = load_cr_checker_module()
script = tmp_path / "script.py"
script.write_text("print('hi')\n", encoding="utf-8")
header_template = load_py_template()
current_year = datetime.now().year
config = write_config(tmp_path, [current_year])

results = cr_checker.process_files(
[script],
{"py": header_template},
True,
config,
use_mmap=False,
encoding="utf-8",
offset=0,
remove_offset=0,
)

assert results["fixed"] == 1
assert results["no_copyright"] == 1
expected_header = header_template.format(year=current_year)
assert script.read_text(encoding="utf-8") == expected_header + "print('hi')\n"
53 changes: 47 additions & 6 deletions cr_checker/tool/cr_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,39 @@ def configure_logging(log_file_path=None, verbose=False):
LOGGER.addHandler(handler)


def detect_shebang_offset(path, encoding):
"""
Detects if a file starts with a shebang (#!) and returns the byte offset
to skip it (length of the first line including newline).

Args:
path (Path): A `pathlib.Path` object pointing to the file.
encoding (str): Encoding type to use when reading the file.

Returns:
int: The byte length of the shebang line (including newline) if present,
otherwise 0.
"""
try:
with open(path, "r", encoding=encoding) as handle:
first_line = handle.readline()
if first_line.startswith("#!"):
# Calculate byte length of the first line
byte_length = len(first_line.encode(encoding))
while True:
next_char = handle.read(1)
if not next_char or next_char not in ("\n", "\r"):
break
byte_length += len(next_char.encode(encoding))
LOGGER.debug(
"Detected shebang in %s with offset %d bytes", path, byte_length
)
return byte_length
except (IOError, OSError) as err:
LOGGER.debug("Could not detect shebang in %s: %s", path, err)
return 0


def load_text_from_file(path, header_length, encoding, offset):
"""
Reads the first portion of a file, up to `header_length` characters
Expand All @@ -210,7 +243,8 @@ def load_text_from_file(path, header_length, encoding, offset):
"Reading first %d characters from file: %s [%s]", total_length, path, encoding
)
with open(path, "r", encoding=encoding) as handle:
return handle.read(total_length)
content = handle.read(total_length)
return content[offset:] if offset else content


def load_text_from_file_with_mmap(path, header_length, encoding, offset):
Expand Down Expand Up @@ -240,10 +274,10 @@ def load_text_from_file_with_mmap(path, header_length, encoding, offset):
)
return ""

LOGGER.debug("Memory mapping first %d bytes from file: %s", header_length, path)
LOGGER.debug("Memory mapping first %d bytes from file: %s", total_length, path)
with open(path, "r", encoding=encoding) as handle:
with mmap.mmap(handle.fileno(), length=length, access=mmap.ACCESS_READ) as fmap:
return fmap[:header_length].decode(encoding)
return fmap[:length].decode(encoding)[offset:]


def has_copyright(path, copyright_text, use_mmap, encoding, offset, config):
Expand Down Expand Up @@ -414,7 +448,7 @@ def fix_copyright(path, copyright_text, encoding, offset):
with open(path, "w", encoding=encoding) as handle:
temp.seek(0)
if offset > 0:
handle.write(first_line + "\n")
handle.write(first_line)
temp.seek(offset)
handle.write(copyright_text.format(year=datetime.now().year))
for chunk in iter(lambda: temp.read(4096), ""):
Expand Down Expand Up @@ -463,11 +497,18 @@ def process_files(
"Skipped (no configuration for selected file extension): %s", item
)
continue
if not has_copyright(item, templates[key], use_mmap, encoding, offset, config):

# Automatically detect shebang and use its offset if no manual offset provided
shebang_offset = detect_shebang_offset(item, encoding)
effective_offset = offset + shebang_offset if offset == 0 else offset

if not has_copyright(
item, templates[key], use_mmap, encoding, effective_offset, config
):
if fix:
if remove_offset:
remove_old_header(item, encoding, remove_offset)
fix_copyright(item, templates[key], encoding, offset)
fix_copyright(item, templates[key], encoding, effective_offset)
results["no_copyright"] += 1
results["fixed"] += 1
else:
Expand Down