diff --git a/.circleci/config.yml b/.circleci/config.yml index 949e1028799dc..2a86bafb80db3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -21,14 +21,12 @@ jobs: command: | git remote add upstream https://github.com/pingcap/docs.git git fetch upstream - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-file-encoding.py - python3 check-file-encoding.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-file-encoding.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') - run: name: "Check git conflicts" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-conflicts.py - python3 check-conflicts.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' '*.yml' '*.yaml') + python3 scripts/check-conflicts.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' '*.yml' '*.yaml') - run: name: "Install markdownlint" @@ -53,20 +51,17 @@ jobs: - run: name: "Check control characters" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-control-char.py - python3 check-control-char.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-control-char.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') - run: name: "Check unclosed tags" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-tags.py - python3 check-tags.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-tags.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') - run: name: "Check manual line breaks" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-manual-line-breaks.py - python3 check-manual-line-breaks.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-manual-line-breaks.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') build: docker: diff --git a/scripts/check-conflicts.py b/scripts/check-conflicts.py new file mode 100644 index 0000000000000..7d940ad4d9f0b --- /dev/null +++ b/scripts/check-conflicts.py @@ -0,0 +1,73 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-conflicts.py. + +import re +import sys +import os + +lineNum = 0 +flag = 0 +pos = [] +single = [] +mark = 0 + +for filename in sys.argv[1:]: + single = [] + lineNum = 0 + if os.path.isfile(filename): + with open(filename,'r') as file: + for line in file: + lineNum += 1 + if re.match(r'<{7}.*\n', line): + flag = 1 + single.append(lineNum) + elif re.match(r'={7}\n', line) : + flag = 2 + elif re.match(r'>{7}', line) and flag == 2: + single.append(lineNum) + pos.append(single) + single = [] + flag = 0 + else: + continue + + + if len(pos): + mark = 1 + print("\n" + filename + ": this file has conflicts in the following lines:\n") + for conflict in pos: + if len(conflict) == 2: + print("CONFLICTS: line " + str(conflict[0]) + " to line " + str(conflict[1]) + "\n") + + pos = [] + +if mark: + print("The above conflicts will cause website build failure. Please fix them.") + exit(1) diff --git a/scripts/check-control-char.py b/scripts/check-control-char.py new file mode 100644 index 0000000000000..e17a721d8c74e --- /dev/null +++ b/scripts/check-control-char.py @@ -0,0 +1,69 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-control-char.py. + +import re, sys, os + +# Check control characters. +def check_control_char(filename): + + lineNum = 0 + pos = [] + flag = 0 + + with open(filename,'r') as file: + for line in file: + + lineNum += 1 + + if re.search(r'[\b]', line): + pos.append(lineNum) + flag = 1 + + if flag: + print("\n" + filename + ": this file has control characters in the following lines:\n") + for cc in pos: + print("CONTROL CHARACTERS: L" + str(cc)) + print("\nPlease delete these control characters.") + + return flag + +if __name__ == "__main__": + + count = 0 + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + flag = check_control_char(filename) + if flag: + count+=1 + + if count: + print("\nThe above issues will cause website build failure. Please fix them.") + exit(1) \ No newline at end of file diff --git a/scripts/check-file-encoding.py b/scripts/check-file-encoding.py new file mode 100644 index 0000000000000..b207659cfc8be --- /dev/null +++ b/scripts/check-file-encoding.py @@ -0,0 +1,57 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-file-encoding.py. + +import sys, os, codecs + +# Convert the file encoding to the default UTF-8 without BOM. +def check_BOM(filename): + BUFSIZE = 4096 + BOMLEN = len(codecs.BOM_UTF8) + + with open(filename, "r+b") as fp: + chunk = fp.read(BUFSIZE) + if chunk.startswith(codecs.BOM_UTF8): + i = 0 + chunk = chunk[BOMLEN:] + while chunk: + fp.seek(i) + fp.write(chunk) + i += len(chunk) + fp.seek(BOMLEN, os.SEEK_CUR) + chunk = fp.read(BUFSIZE) + fp.seek(-BOMLEN, os.SEEK_CUR) + fp.truncate() + print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.") + +if __name__ == "__main__": + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + check_BOM(filename) \ No newline at end of file diff --git a/scripts/check-manual-line-breaks.py b/scripts/check-manual-line-breaks.py new file mode 100644 index 0000000000000..7102581ff37e2 --- /dev/null +++ b/scripts/check-manual-line-breaks.py @@ -0,0 +1,115 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-manual-line-breaks.py. + +import re, sys, os + +# Check manual line break within a paragraph. +def check_manual_break(filename): + + two_lines = [] + metadata = 0 + toggle = 0 + ctoggle = 0 + lineNum = 0 + mark = 0 + + with open(filename,'r') as file: + for line in file: + + lineNum += 1 + + # Count the number of '---' to skip metadata. + if metadata < 2 : + if re.match(r'(\s|\t)*(-){3}', line): + metadata += 1 + continue + else: + # Skip tables and notes. + if re.match(r'(\s|\t)*(\||>)\s*\w*',line): + continue + + # Skip html tags and markdownlint tags. + if re.match(r'(\s|\t)*((<\/*(.*)>)|)\s*\w*',line): + if re.match(r'(\s|\t)*(
|)',line):
+                        ctoggle = 1
+                    elif re.match(r'(\s|\t)*(<\/code><\/pre>|<\/table>)',line):
+                        ctoggle = 0
+                    else:
+                        continue
+
+                # Skip image links.
+                if re.match(r'(\s|\t)*!\[.+\](\(.+\)|: [a-zA-z]+://[^\s]*)',line):
+                    continue
+
+                # Set a toggle to skip code blocks.
+                if re.match(r'(\s|\t)*`{3}', line):
+                    toggle = abs(1-toggle)
+
+                if toggle == 1 or ctoggle == 1:
+                    continue
+                else:
+                    # Keep a record of the current line and the former line.
+                    if len(two_lines)<1:
+                        two_lines.append(line)
+                        continue
+                    elif len(two_lines) == 1:
+                        two_lines.append(line)
+                    else:
+                        two_lines.append(line)
+                        two_lines.pop(0)
+
+                    # Compare if there is a manual line break between the two lines.
+                    if re.match(r'(\s|\t)*\n', two_lines[0]) or re.match(r'(\s|\t)*\n', two_lines[1]):
+                        continue
+                    else:
+                        if re.match(r'(\s|\t)*(-|\+|(\d+|\w{1})\.|\*)\s*\w*',two_lines[0]) and re.match(r'(\s|\t)*(-|\+|\d+|\w{1}\.|\*)\s*\w*',two_lines[1]):
+                            continue
+
+                        if mark == 0:
+                            print("\n" + filename + ": this file has manual line breaks in the following lines:\n")
+                            mark = 1
+
+                        print("MANUAL LINE BREAKS: L" + str(lineNum))
+    return mark
+
+
+if __name__ == "__main__":
+
+    count = 0
+
+    for filename in sys.argv[1:]:
+        if os.path.isfile(filename):
+            mark = check_manual_break(filename)
+            if mark :
+                count+=1
+
+    if count:
+        print("\nThe above issues will cause website build failure. Please fix them.")
+        exit(1)
\ No newline at end of file
diff --git a/scripts/check-tags.py b/scripts/check-tags.py
new file mode 100644
index 0000000000000..51eb14ff6920d
--- /dev/null
+++ b/scripts/check-tags.py
@@ -0,0 +1,184 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-tags.py.
+
+import re
+import sys
+import os
+
+# reference: https://stackoverflow.com/questions/35761133/python-how-to-check-for-open-and-close-tags
+def stack_tag(tag, stack):
+    t = tag[1:-1]
+    first_space = t.find(' ')
+    #print(t)
+    if t[-1:] == '/':
+        self_closed_tag = True
+    elif t[:1] != '/':
+        # Add tag to stack
+        if first_space == -1:
+            stack.append(t)
+            # print("TRACE open", stack)
+        else:
+            stack.append(t[:first_space])
+            # print("TRACE open", stack)
+    else:
+        if first_space != -1:
+            t = t[1:first_space]
+        else:
+            t = t[1:]
+
+        if len(stack) == 0:
+            # print("No blocks are open; tried to close", t)
+            closed_tag = True
+        else:
+            if stack[-1] == t:
+                # Close the block
+                stack.pop()
+                # print("TRACE close", t, stack)
+            else:
+                # print("Tried to close", t, "but most recent open block is", stack[-1])
+                if t in stack:
+                    stack.remove(t)
+                    # print("Prior block closed; continuing")
+
+    # if len(stack):
+    #     print("Blocks still open at EOF:", stack)
+    return stack
+
+def tag_is_wrapped(pos, content):
+    tag_start = pos[0]
+    tag_end = pos[1]
+    content_previous = content[:tag_start][::-1] # reverse content_previous
+    content_later = content[tag_end:]
+
+    left_wraps_findall = re.findall(r'`', content_previous)
+    left_single_backtick = len(left_wraps_findall) % 2
+    right_wraps_findall = re.findall(r'`', content_later)
+    right_single_backtick = len(right_wraps_findall) % 2
+    # print(left_single_backtick, right_single_backtick)
+
+    if left_single_backtick != 0 and right_single_backtick != 0:
+        # print(content_previous.find('`'), content_later.find('`'))
+        # print(content_previous)
+        # print(content_later)
+        return True
+    else:
+        # print(content_previous.find('`'), content_later.find('`'))
+        # print(content_previous)
+        # print(content_later)
+        return False
+
+def filter_frontmatter(content):
+    # if there is frontmatter, remove it
+    if content.startswith('---'):
+        collect = []
+        content_finditer = re.finditer(r'---\n', content)
+        for i in content_finditer:
+            meta_pos = i.span()[1]
+            collect.append(meta_pos)
+
+        # if the number of "---" >= 2
+        if len(collect) >= 2:
+            filter_point = collect[1]
+            content = content[filter_point:]
+            
+    return content
+
+def filter_backticks(content, filename):
+    # remove content wrapped by backticks
+    backticks = []
+    content_findall = re.findall(r'```', content)
+    if len(content_findall):
+        content_finditer = re.finditer(r'```', content)
+        for i in content_finditer:
+            pos = i.span()
+            backticks.append(pos)
+        # e.g. backticks = [[23, 26],[37, 40],[123, 126],[147, 150]]
+        if len(backticks) % 2 != 0:
+            # print(len(content_findall))
+            # print(backticks)
+            # print(backticks[0][0], backticks[0][1])
+            print(filename, ": Some of your code blocks ``` ```  are not closed. Please close them.")
+            exit(1)
+        elif len(backticks) != 0:
+            backticks_start = backticks[0][0]
+            backticks_end = backticks[1][1]
+            # print(backticks_start, backticks_end)
+            content = content.replace(content[backticks_start:backticks_end],'')
+            content = filter_backticks(content, filename)
+    return content
+
+status_code = 0
+
+# print(sys.argv[1:])
+for filename in sys.argv[1:]:
+    # print("Checking " + filename + "......\n")
+    if os.path.isfile(filename):
+        file = open(filename, "r" )
+        content = file.read()
+        file.close()
+
+        content = filter_frontmatter(content)
+        content = filter_backticks(content, filename)
+        # print(content)
+        result_findall = re.findall(r'<([^\n`>]*)>', content)
+        if len(result_findall) == 0:
+            # print("The edited markdown file " + filename + " has no tags!\n")
+            continue
+        else:
+            result_finditer = re.finditer(r'<([^\n`>]*)>', content)
+            stack = []
+            for i in result_finditer:
+                # print(i.group(), i.span())
+                tag = i.group()
+                pos = i.span()
+
+                if tag[:4] == '':
+                    continue
+                elif content[pos[0]-2:pos[0]] == '{{' and content[pos[1]:pos[1]+2] == '}}':
+                    # print(tag) # filter copyable shortcodes
+                    continue
+                elif tag[:5] == '' for i in stack]
+                print("ERROR: " + filename + ' has unclosed tags: ' + ', '.join(stack) + '.\n')
+                status_code = 1
+
+if status_code:
+    print("HINT: Unclosed tags will cause website build failure. Please fix the reported unclosed tags. You can use backticks `` to wrap them or close them. Thanks.")
+    exit(1)
diff --git a/scripts/check-zh-punctuation.py b/scripts/check-zh-punctuation.py
new file mode 100644
index 0000000000000..87fc0a58b989b
--- /dev/null
+++ b/scripts/check-zh-punctuation.py
@@ -0,0 +1,84 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-zh-punctuation.py.
+
+import sys, os, zhon.hanzi
+
+# Check Chinese punctuation in English files.
+
+def check_zh_punctuation(filename):
+
+    lineNum = 0
+    pos = []
+    zh_punc = []
+    acceptable_punc = ['–','—'] # em dash and en dash
+    flag = 0
+
+    with open(filename, 'r') as file:
+        for line in file:
+
+            count = 0
+            lineNum += 1
+            punc_inline = ""
+
+            for char in line:
+
+                if char in zhon.hanzi.punctuation and char not in acceptable_punc :
+                    flag = 1
+                    if count != 1:
+                        pos.append(lineNum)
+                    punc_inline += char
+                    count = 1
+
+            if punc_inline != "":
+                zh_punc.append(punc_inline)
+
+    if flag:
+        print("\n" + filename + ": this file has Chinese punctuation in the following lines:\n")
+
+        count = 0
+        for lineNum in pos:
+            print("Chinese punctuation: L" + str(lineNum) + " has " + zh_punc[count])
+            count += 1
+
+    return flag
+
+if __name__ == "__main__":
+
+    count = 0
+
+    for filename in sys.argv[1:]:
+        if os.path.isfile(filename):
+            flag = check_zh_punctuation(filename)
+            if flag:
+                count+=1
+
+    if count:
+        print("\nThe above issues will ruin your article. Please convert these marks into English punctuation.")
+        exit(1)
\ No newline at end of file
diff --git a/scripts/file-format-lint.py b/scripts/file-format-lint.py
new file mode 100644
index 0000000000000..e4ba16b3e176b
--- /dev/null
+++ b/scripts/file-format-lint.py
@@ -0,0 +1,156 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/file-format-lint.py.
+
+import re, sys, os, codecs
+
+# Convert the file encoding to the default UTF-8 without BOM.
+def check_BOM(filename):
+    BUFSIZE = 4096
+    BOMLEN = len(codecs.BOM_UTF8)
+
+    with open(filename, "r+b") as fp:
+        chunk = fp.read(BUFSIZE)
+        if chunk.startswith(codecs.BOM_UTF8):
+            i = 0
+            chunk = chunk[BOMLEN:]
+            while chunk:
+                fp.seek(i)
+                fp.write(chunk)
+                i += len(chunk)
+                fp.seek(BOMLEN, os.SEEK_CUR)
+                chunk = fp.read(BUFSIZE)
+            fp.seek(-BOMLEN, os.SEEK_CUR)
+            fp.truncate()
+            print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.")
+
+# Check control characters.
+def check_control_char(filename):
+
+    lineNum = 0
+    pos = []
+    flag = 0
+
+    with open(filename,'r') as file:
+        for line in file:
+
+            lineNum += 1
+
+            if re.search(r'[\b]', line):
+                pos.append(lineNum)
+                flag = 1
+
+    if flag:
+        print("\n" + filename + ": this file has control characters in the following lines:\n")
+        for cc in pos:
+            print("CONTROL CHARACTERS IN L" + str(cc))
+        print("Please delete these control characters.")
+
+    return flag
+
+
+# Check manual line break within a paragraph.
+def check_manual_break(filename):
+
+    two_lines = []
+    metadata = 0
+    toggle = 0
+    lineNum = 0
+    mark = 0
+
+    with open(filename,'r') as file:
+        for line in file:
+
+            lineNum += 1
+
+            # Count the number of '---' to skip metadata.
+            if metadata < 2 :
+                if re.match(r'(\s|\t)*(-){3}', line):
+                    metadata += 1
+                continue
+            else:
+                # Skip tables and notes.
+                if re.match(r'(\s|\t)*(\||>)\s*\w*',line):
+                    continue
+
+                # Skip html tags and markdownlint tags.
+                if re.match(r'(\s|\t)*((<\/*\w+>)|)\s*\w*',line):
+                    continue
+
+                # Skip links and images.
+                if re.match(r'(\s|\t)*!*\[.+\](\(.+\)|: [a-zA-z]+://[^\s]*)',line):
+                    continue
+
+                # Set a toggle to skip code blocks.
+                if re.match(r'(\s|\t)*`{3}', line):
+                    toggle = abs(1-toggle)
+
+                if toggle == 1:
+                    continue
+                else:
+                    # Keep a record of the current line and the former line.
+                    if len(two_lines)<1:
+                        two_lines.append(line)
+                        continue
+                    elif len(two_lines) == 1:
+                        two_lines.append(line)
+                    else:
+                        two_lines.append(line)
+                        two_lines.pop(0)
+
+                    # Compare if there is a manual line break between the two lines.
+                    if re.match(r'(\s|\t)*\n', two_lines[0]) or re.match(r'(\s|\t)*\n', two_lines[1]):
+                        continue
+                    else:
+                        if re.match(r'(\s|\t)*(-|\+|(\d+|\w{1})\.|\*)\s*\w*',two_lines[0]) and re.match(r'(\s|\t)*(-|\+|\d+|\w{1}\.|\*)\s*\w*',two_lines[1]):
+                            continue
+
+                        if mark == 0:
+                            print("\n" + filename + ": this file has manual line breaks in the following lines:\n")
+                            mark = 1
+
+                        print("MANUAL LINE BREAKS: L" + str(lineNum))
+    return mark
+
+
+if __name__ == "__main__":
+
+    count = 0
+
+    for filename in sys.argv[1:]:
+        if os.path.isfile(filename):
+            check_BOM(filename)
+            flag = check_control_char(filename)
+            mark = check_manual_break(filename)
+            if mark or flag:
+                count+=1
+
+    if count:
+        print("\nThe above issues will cause website build failure. Please fix them.")
+        exit(1)
\ No newline at end of file
diff --git a/scripts/get-issue-number.py b/scripts/get-issue-number.py
new file mode 100644
index 0000000000000..e8f8dde36831b
--- /dev/null
+++ b/scripts/get-issue-number.py
@@ -0,0 +1,122 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/get-issue-number.py.
+
+import re, os, sys
+import requests
+from tempfile import mkstemp
+from shutil import move
+from os import remove
+from bs4 import BeautifulSoup
+
+def get_issue_link(pr_url):
+
+    print("Connecting to " + pr_url + " ...")
+
+    response = requests.get(pr_url)
+
+    if response:
+
+        resp = BeautifulSoup(response.text, "html.parser")
+
+        table = resp.find("table", "d-block")
+
+        paragraphs = table.findAll("p")
+
+        flag = 0
+        match = 0
+
+        for p in  paragraphs:
+            # print(p.contents[0])
+
+            if isinstance(p.contents[0], str):
+                match = re.search(r'(Issue Number)|(fix)|(bug).*', p.contents[0], re.I)
+
+            if match or p.find('span', attrs = {"class": "issue-keyword"}):
+                issue_link = p.find('a', attrs = {"data-hovercard-type":"issue"}) or p.find('a', attrs = {"class": "issue-link"})
+                if issue_link:
+                    flag = 1
+                    link = issue_link['href']
+                break
+
+        if flag:
+            print('Related issue number: ' + link)
+            return link
+        else:
+            print("No related issue number.\n")
+            return 0
+
+        #print(paragraphs)
+
+    else:
+        print('Connection failed. No html content')
+        return 0
+
+def change_pr_to_issue(filename):
+
+    fh, target_file_path = mkstemp()
+    source_file_path = filename
+    match_start = 1
+    with open(target_file_path, 'w', encoding='utf-8') as target_file:
+        with open(source_file_path,'r', encoding='utf-8') as source_file:
+
+            for line in source_file:
+
+                if re.match(r'## Bug',line):
+                    match_start = 0
+                    print("Match Start\n")
+
+                if match_start == 0:
+                    matchObj = re.search(r'\[#\d+\]\([a-zA-z]+://[^\s]*\)',line)
+                    if matchObj:
+                        link = re.search(r'[a-zA-z]+://[^\s]*[^\)]', matchObj.group())
+                        pr_url = link.group()
+                        issue_url = get_issue_link(pr_url)
+
+                        # 判断有记录 issue link 的在原文件中替换
+                        if issue_url:
+                            issue_num = re.search(r'\d+', issue_url)
+                            issue_md = '[#' + issue_num.group() + ']' + '(' + issue_url + ')'
+                            line = re.sub(r'\[#\d+\]\([a-zA-z]+://[^\s]*\)', issue_md, line)
+                            print(issue_md + '\n')
+
+                target_file.write(line)
+
+    remove(source_file_path)
+    move(target_file_path, source_file_path)
+
+# get_issue_link("https://github.com/pingcap/tidb/pull/22924")
+
+# change_pr_to_issue('./releases/release-4.0.13.md')
+
+if __name__ == "__main__":
+
+    for filename in sys.argv[1:]:
+        if os.path.isfile(filename):
+            change_pr_to_issue(filename)