diff --git a/.github/workflows/check_markup_links.yml b/.github/workflows/check_markup_links.yml index 66b04876f6..6ed63a2183 100644 --- a/.github/workflows/check_markup_links.yml +++ b/.github/workflows/check_markup_links.yml @@ -9,11 +9,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: tcort/github-action-markdown-link-check@v1 + - uses: actions/setup-python@v4 with: - folder-path: 'docs' - file-path: README.md - check-modified-files-only: yes - base-branch: main - use-quiet-mode: yes - use-verbose-mode: yes + python-version: '3.10' + - name: Check Markup Links + run: | + python python/tools/markdown_link_check.py docs diff --git a/docs/lang/articles/contribution/contributor_guide.md b/docs/lang/articles/contribution/contributor_guide.md index 750a35e693..9b9487402a 100644 --- a/docs/lang/articles/contribution/contributor_guide.md +++ b/docs/lang/articles/contribution/contributor_guide.md @@ -146,7 +146,7 @@ No problem, the CI bot will run the code checkers and format your codes automati -> For more style information for your C++ code, see [our C++ style](#c-style). +> For more style information for your C++ code, see [our C++ style](#c-style-guide). ### C++ style guide @@ -210,7 +210,7 @@ Now you get to the point where you need to get your hands dirty with your PRs. T - **When implementing a complex feature:** - Consider breaking it down to multiple separate, self-contained PRs to provide the community with a clearer context and keep a more traceable development history. - - If you're already a collaborator or maintainer with write access to the Taichi repository, please consider adopting [the ghstack workflow](#ghstack-workflow). + - If you're already a collaborator or maintainer with write access to the Taichi repository, please consider adopting [the ghstack workflow](#implementing-the-ghstack-workflow-for-complex-changesets). - **When creating a PR:** @@ -318,8 +318,6 @@ Here, we do not want to repeat some best practices summarized in the following G - [How to have your PR merged quickly](https://testing.googleblog.com/2017/06/code-health-too-many-comments-on-your.html) - - ### Implementing the ghstack workflow for complex changesets The standard GitHub PR workflow can become unwieldy when dealing with large changesets. diff --git a/docs/lang/articles/contribution/dev_install.md b/docs/lang/articles/contribution/dev_install.md index d8f26b411d..ceac938468 100644 --- a/docs/lang/articles/contribution/dev_install.md +++ b/docs/lang/articles/contribution/dev_install.md @@ -25,7 +25,7 @@ See the [Get Started](https://docs.taichi-lang.org/) for more information on qui - [Installing optional dependencies](#install-optional-dependencies) - [Building Taichi from source](#build-taichi-from-source) - [List of TAICHI_CMAKE_ARGS](#list-of-taichi_cmake_args) - - [Usage and behavior of `build.py`](#usage-and-behavior-of-buildpy) + - [Usage and behavior of `build.py`](#design-goals-behaviors-and-usage-of-buildpy) - [Troubleshooting and debugging](#troubleshooting-and-debugging) - [Frequently asked questions](#frequently-asked-questions) diff --git a/docs/lang/articles/contribution/doc_writing.md b/docs/lang/articles/contribution/doc_writing.md index 8602138950..d4226eb0f1 100644 --- a/docs/lang/articles/contribution/doc_writing.md +++ b/docs/lang/articles/contribution/doc_writing.md @@ -62,7 +62,7 @@ def paint(t: float): ## Cross-references -To link to another section within the same article, you would use `[Return to ## 1. Code blocks](#1-code-blocks)`: [Return to ## 1. Code blocks](#1-code-blocks). +To link to another section within the same article, you would use `[Return to ## 1. Code blocks](#code-blocks)`: [Return to ## 1. Code blocks](#code-blocks). We follow the best practices suggested by [Docusaurus](https://docusaurus.io/docs/markdown-features/links) to cross-reference other documents, so to link to sections in other articles, please use the following relative-path based syntax, which is docs-versioning and IDE/GitHub friendly: @@ -93,7 +93,7 @@ You **HAVE TO** insert blank lines to make them work: ```md
-![](./some_pic.png) +Dummy text
``` diff --git a/docs/lang/articles/reference/language_reference.md b/docs/lang/articles/reference/language_reference.md index f6d37ab801..b41d9cba7a 100644 --- a/docs/lang/articles/reference/language_reference.md +++ b/docs/lang/articles/reference/language_reference.md @@ -113,7 +113,7 @@ Following the [Values and types](#values-and-types) section, if both operands of a binary operation are Python values, compile-time evaluation is triggered and a result Python value is produced. If only one operand is a Python value, it is first turned into a Taichi value with -[default type](../type_system/type.md#default-primitive-types-for-integers-and-floating-point-numbers). +[default type](../type_system/type.md#primitive-types). Now the only remaining case is that both operands are Taichi values. Binary operations can happen between Taichi values of either primitive type or @@ -277,7 +277,7 @@ positional_item ::= assignment_expression | "*" expression ``` The `primary` must be evaluated to one of: -- A [Taichi function](../kernels/kernel_function.md#taichi-function). +- A [Taichi function](../kernels/kernel_function.md#kernels-and-functions). - A [Taichi builtin function](./operator.md#other-arithmetic-functions). - A Taichi primitive type. In this case, the `positional_arguments` must only contain one item. If the item is evaluated to a Python value, then the @@ -499,7 +499,7 @@ with the following points to notice: target is an identifier appearing for the first time, a variable is defined with that name and inferred type from the corresponding right-hand side expression. If the expression is evaluated to a Python value, it will be turned -into a Taichi value with [default type](../type_system/type.md#default-primitive-types-for-integers-and-floating-point-numbers). +into a Taichi value with [default type](../type_system/type.md#primitive-types). - If a target is an existing identifier, the corresponding right-hand side expression must be evaluated to a Taichi value with the type of the corresponding variable of that identifier. Otherwise, an implicit cast will diff --git a/docs/lang/articles/reference/operator.md b/docs/lang/articles/reference/operator.md index 18bd637c32..4c067f7c01 100644 --- a/docs/lang/articles/reference/operator.md +++ b/docs/lang/articles/reference/operator.md @@ -57,7 +57,7 @@ Python3 distinguishes `/` (true division) and `//` (floor division), e.g., `1.0 To avoid such implicit casting, you can manually cast your operands to desired types, using `ti.cast`. You can control yourself the precision instead of using the default, see -[Default precisions](#default-precisions). +[Default precisions](global_settings.md#going-high-precision). Taichi also provides `ti.raw_div` function which performs true division if one of the operands is floating point type and performs floor division if both operands are integral types. diff --git a/docs/rfcs/20220413-aot-for-all-snode.md b/docs/rfcs/20220413-aot-for-all-snode.md index 0be0e8e7c3..fb67ee5735 100644 --- a/docs/rfcs/20220413-aot-for-all-snode.md +++ b/docs/rfcs/20220413-aot-for-all-snode.md @@ -6,7 +6,7 @@ --- - [RFC: AOT for All SNodes](#rfc-aot-for-all-snodes) - - [* Relevant Issue: https://github.com/taichi-dev/taichi/issues/4777](#-relevant-issue-httpsgithubcomtaichi-devtaichiissues4777) + - [* Relevant Issue: https://github.com/taichi-dev/taichi/issues/4777](https://github.com/taichi-dev/taichi/issues/4777) - [TL;DR](#tldr) - [Background](#background) - [Goals](#goals) diff --git a/python/tools/markdown_link_check.py b/python/tools/markdown_link_check.py new file mode 100644 index 0000000000..6f0ed9f52a --- /dev/null +++ b/python/tools/markdown_link_check.py @@ -0,0 +1,152 @@ +import re +import os +import pathlib +from urllib.parse import urlparse +import argparse + +error_found = False # Track if any errors are found + +def check_markdown_links(file_path, base_dir=None): + """ + Check all links in a Markdown file, including anchor references. + + Args: + file_path: Path to the Markdown file + base_dir: Base directory for relative links (defaults to file's directory) + """ + global error_found + if base_dir is None: + base_dir = os.path.dirname(os.path.abspath(file_path)) + + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Find all links and image references + link_pattern = r'\[.*?\]\((.*?)\)|!\[.*?\]\((.*?)\)' + matches = re.findall(link_pattern, content) + + # Combine both capturing groups (links and images) + links = [match[0] or match[1] for match in matches if match[0] or match[1]] + + for link in links: + parsed = urlparse(link) + + # Skip mailto and external links + if parsed.scheme in ('http', 'https', 'mailto'): + print(f"[-] External link (not checked): {link}") + continue + + # Handle anchor-only links + if not parsed.path and parsed.fragment: + check_anchor(file_path, parsed.fragment) + continue + + # Handle relative paths + if not parsed.scheme and not parsed.netloc: + full_path = os.path.normpath(os.path.join(base_dir, parsed.path)) + + # Check if file exists + if not os.path.exists(full_path): + print(f"❌ Broken link: {link} (File not found: {full_path})") + error_found = True + continue + + # Check anchor in local file + if parsed.fragment: + if full_path.endswith('.md'): + check_anchor(full_path, parsed.fragment) + else: + # For non-markdown files, we can't check anchors + print(f"⚠️ Anchor in non-Markdown file (not checked): {link}") + +def check_anchor(md_file_path, anchor): + """ + Check if an anchor exists in a Markdown file. + + Args: + md_file_path: Path to the Markdown file + anchor: Anchor to check (without #) + """ + global error_found + try: + with open(md_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Improved anchor cleaning: remove non-alphanum except hyphens, collapse multiple hyphens, strip hyphens + def clean_anchor(s): + s = s.lower().replace(' ', '-') + s = re.sub(r'[^a-z0-9\-]', '', s) + s = re.sub(r'-+', '-', s) + s = s.strip('-') + return s + + normalized_anchor = clean_anchor(anchor) + + # Pattern for Markdown headers + header_pattern = r'^#+\s+(.*)$' + + found = False + available_anchors = [] + for line in content.split('\n'): + match = re.match(header_pattern, line) + if match: + header_text = match.group(1) + anchor_dash = clean_anchor(header_text) + anchor_underscore = re.sub(r'[^a-z0-9\-]', '', header_text.lower().replace(' ', '_')) + anchor_nospace = re.sub(r'[^a-z0-9\-]', '', header_text.replace(' ', '')) + anchor_raw = re.sub(r'[^a-z0-9\-]', '', header_text) + possible_anchors = [ + anchor_dash, + anchor_underscore, + anchor_nospace, + anchor_raw + ] + available_anchors.append(anchor_dash) + if normalized_anchor in possible_anchors: + found = True + break + + if not found: + print(f"❌ Broken anchor: #{anchor} in {md_file_path}") + print(f" Available anchors in this file:") + for a in available_anchors: + print(f" - {a}") + error_found = True + except Exception as e: + print(f"⚠️ Error checking anchor #{anchor} in {md_file_path}: {str(e)}") + +def find_markdown_files(root_dir): + """ + Recursively find all .md files under root_dir. + """ + md_files = [] + for dirpath, _, filenames in os.walk(root_dir): + for filename in filenames: + if filename.lower().endswith('.md'): + md_files.append(os.path.join(dirpath, filename)) + return md_files + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Check Markdown links in a directory or a single Markdown file.") + parser.add_argument("path", help="Path to the root directory or a Markdown file") + args = parser.parse_args() + + input_path = os.path.abspath(args.path) + md_files = [] + + if os.path.isdir(input_path): + md_files = find_markdown_files(input_path) + if not md_files: + print(f"No Markdown files found in {input_path}") + exit(0) + elif os.path.isfile(input_path) and input_path.lower().endswith('.md'): + md_files = [input_path] + else: + print(f"Error: {input_path} is not a directory or a Markdown (.md) file.") + exit(1) + + for md_file in md_files: + print(f"\nChecking: {md_file}") + check_markdown_links(md_file, base_dir=os.path.dirname(md_file)) + if error_found: + exit(2) diff --git a/tests/python/test_tools_markdown_check.py b/tests/python/test_tools_markdown_check.py new file mode 100644 index 0000000000..b72d8d88e9 --- /dev/null +++ b/tests/python/test_tools_markdown_check.py @@ -0,0 +1,92 @@ +import tempfile +import os +import pytest +from tools.markdown_link_check import check_markdown_links, check_anchor, find_markdown_files + +@pytest.fixture +def temp_dir(): + with tempfile.TemporaryDirectory() as d: + yield d + +def write_md(base_dir, filename, content): + path = os.path.join(base_dir, filename) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + f.write(content) + return path + +def test_find_markdown_files(temp_dir): + write_md(temp_dir, "a.md", "# Title") + write_md(temp_dir, "b.txt", "not markdown") + os.mkdir(os.path.join(temp_dir, "sub")) + write_md(temp_dir, "sub/c.md", "# Sub") + files = find_markdown_files(temp_dir) + assert len(files) == 2 + assert any(f.endswith("a.md") for f in files) + assert any(f.endswith("c.md") for f in files) + +def test_check_markdown_links_valid(temp_dir, capsys): + md = "# Title\n[Link](other.md)\n" + other = "# Other" + write_md(temp_dir, "main.md", md) + write_md(temp_dir, "other.md", other) + check_markdown_links(os.path.join(temp_dir, "main.md"), temp_dir) + out = capsys.readouterr().out + assert "❌" not in out + +def test_check_markdown_links_broken_file(temp_dir, capsys): + md = "# Title\n[Missing](missing.md)\n" + write_md(temp_dir, "main.md", md) + check_markdown_links(os.path.join(temp_dir, "main.md"), temp_dir) + out = capsys.readouterr().out + assert "❌ Broken link" in out + +def test_check_anchor_found(temp_dir, capsys): + md = "# My Header\n" + path = write_md(temp_dir, "doc.md", md) + check_anchor(path, "my-header") + out = capsys.readouterr().out + assert "❌" not in out + +def test_check_anchor_not_found(temp_dir, capsys): + md = "# My Header\n" + path = write_md(temp_dir, "doc.md", md) + check_anchor(path, "not-present") + out = capsys.readouterr().out + assert "❌ Broken anchor" in out + +def test_check_anchor_symbol_removal(temp_dir, capsys): + md = "# My `Header`.\n" + path = write_md(temp_dir, "doc.md", md) + check_anchor(path, "my-header") + out = capsys.readouterr().out + assert "❌" not in out + +def test_external_and_mailto_links(temp_dir, capsys): + md = "# Title\n[Google](https://google.com)\n[Email](mailto:test@example.com)\n" + path = write_md(temp_dir, "main.md", md) + check_markdown_links(path, temp_dir) + out = capsys.readouterr().out + assert "External link" in out + +def test_anchor_only_link(temp_dir, capsys): + md = "# Section 1\n[Go](#section-1)\n" + path = write_md(temp_dir, "main.md", md) + check_markdown_links(path, temp_dir) + out = capsys.readouterr().out + assert "❌" not in out + + +def test_pr_review(temp_dir, capsys): + md = """ + +- [PR review & merging checklist](#pr-review-merging-checklist) + +### PR review & merging checklist + +Follow this checklist during PR review or merging: +""" + path = write_md(temp_dir, "main.md", md) + check_markdown_links(path, temp_dir) + out = capsys.readouterr().out + assert "❌" not in out