diff --git a/.circleci/config.yml b/.circleci/config.yml index 949e1028799dc..2a86bafb80db3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -21,14 +21,12 @@ jobs: command: | git remote add upstream https://github.com/pingcap/docs.git git fetch upstream - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-file-encoding.py - python3 check-file-encoding.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-file-encoding.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') - run: name: "Check git conflicts" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-conflicts.py - python3 check-conflicts.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' '*.yml' '*.yaml') + python3 scripts/check-conflicts.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' '*.yml' '*.yaml') - run: name: "Install markdownlint" @@ -53,20 +51,17 @@ jobs: - run: name: "Check control characters" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-control-char.py - python3 check-control-char.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-control-char.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') - run: name: "Check unclosed tags" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-tags.py - python3 check-tags.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-tags.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') - run: name: "Check manual line breaks" command: | - wget https://raw.githubusercontent.com/CharLotteiu/pingcap-docs-checks/main/check-manual-line-breaks.py - python3 check-manual-line-breaks.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') + python3 scripts/check-manual-line-breaks.py $(git diff-tree --name-only --no-commit-id -r upstream/master..HEAD -- '*.md' ':(exclude).github/*') build: docker: diff --git a/scripts/check-conflicts.py b/scripts/check-conflicts.py new file mode 100644 index 0000000000000..7d940ad4d9f0b --- /dev/null +++ b/scripts/check-conflicts.py @@ -0,0 +1,73 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-conflicts.py. + +import re +import sys +import os + +lineNum = 0 +flag = 0 +pos = [] +single = [] +mark = 0 + +for filename in sys.argv[1:]: + single = [] + lineNum = 0 + if os.path.isfile(filename): + with open(filename,'r') as file: + for line in file: + lineNum += 1 + if re.match(r'<{7}.*\n', line): + flag = 1 + single.append(lineNum) + elif re.match(r'={7}\n', line) : + flag = 2 + elif re.match(r'>{7}', line) and flag == 2: + single.append(lineNum) + pos.append(single) + single = [] + flag = 0 + else: + continue + + + if len(pos): + mark = 1 + print("\n" + filename + ": this file has conflicts in the following lines:\n") + for conflict in pos: + if len(conflict) == 2: + print("CONFLICTS: line " + str(conflict[0]) + " to line " + str(conflict[1]) + "\n") + + pos = [] + +if mark: + print("The above conflicts will cause website build failure. Please fix them.") + exit(1) diff --git a/scripts/check-control-char.py b/scripts/check-control-char.py new file mode 100644 index 0000000000000..e17a721d8c74e --- /dev/null +++ b/scripts/check-control-char.py @@ -0,0 +1,69 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-control-char.py. + +import re, sys, os + +# Check control characters. +def check_control_char(filename): + + lineNum = 0 + pos = [] + flag = 0 + + with open(filename,'r') as file: + for line in file: + + lineNum += 1 + + if re.search(r'[\b]', line): + pos.append(lineNum) + flag = 1 + + if flag: + print("\n" + filename + ": this file has control characters in the following lines:\n") + for cc in pos: + print("CONTROL CHARACTERS: L" + str(cc)) + print("\nPlease delete these control characters.") + + return flag + +if __name__ == "__main__": + + count = 0 + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + flag = check_control_char(filename) + if flag: + count+=1 + + if count: + print("\nThe above issues will cause website build failure. Please fix them.") + exit(1) \ No newline at end of file diff --git a/scripts/check-file-encoding.py b/scripts/check-file-encoding.py new file mode 100644 index 0000000000000..b207659cfc8be --- /dev/null +++ b/scripts/check-file-encoding.py @@ -0,0 +1,57 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-file-encoding.py. + +import sys, os, codecs + +# Convert the file encoding to the default UTF-8 without BOM. +def check_BOM(filename): + BUFSIZE = 4096 + BOMLEN = len(codecs.BOM_UTF8) + + with open(filename, "r+b") as fp: + chunk = fp.read(BUFSIZE) + if chunk.startswith(codecs.BOM_UTF8): + i = 0 + chunk = chunk[BOMLEN:] + while chunk: + fp.seek(i) + fp.write(chunk) + i += len(chunk) + fp.seek(BOMLEN, os.SEEK_CUR) + chunk = fp.read(BUFSIZE) + fp.seek(-BOMLEN, os.SEEK_CUR) + fp.truncate() + print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.") + +if __name__ == "__main__": + + for filename in sys.argv[1:]: + if os.path.isfile(filename): + check_BOM(filename) \ No newline at end of file diff --git a/scripts/check-manual-line-breaks.py b/scripts/check-manual-line-breaks.py new file mode 100644 index 0000000000000..7102581ff37e2 --- /dev/null +++ b/scripts/check-manual-line-breaks.py @@ -0,0 +1,115 @@ +# Copyright 2021 PingCAP, Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License + +# Copyright (c) 2021 Charlotte Liu + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-manual-line-breaks.py. + +import re, sys, os + +# Check manual line break within a paragraph. +def check_manual_break(filename): + + two_lines = [] + metadata = 0 + toggle = 0 + ctoggle = 0 + lineNum = 0 + mark = 0 + + with open(filename,'r') as file: + for line in file: + + lineNum += 1 + + # Count the number of '---' to skip metadata. + if metadata < 2 : + if re.match(r'(\s|\t)*(-){3}', line): + metadata += 1 + continue + else: + # Skip tables and notes. + if re.match(r'(\s|\t)*(\||>)\s*\w*',line): + continue + + # Skip html tags and markdownlint tags. + if re.match(r'(\s|\t)*((<\/*(.*)>)|)\s*\w*',line): + if re.match(r'(\s|\t)*(
|)',line):
+ ctoggle = 1
+ elif re.match(r'(\s|\t)*(<\/code><\/pre>|<\/table>)',line):
+ ctoggle = 0
+ else:
+ continue
+
+ # Skip image links.
+ if re.match(r'(\s|\t)*!\[.+\](\(.+\)|: [a-zA-z]+://[^\s]*)',line):
+ continue
+
+ # Set a toggle to skip code blocks.
+ if re.match(r'(\s|\t)*`{3}', line):
+ toggle = abs(1-toggle)
+
+ if toggle == 1 or ctoggle == 1:
+ continue
+ else:
+ # Keep a record of the current line and the former line.
+ if len(two_lines)<1:
+ two_lines.append(line)
+ continue
+ elif len(two_lines) == 1:
+ two_lines.append(line)
+ else:
+ two_lines.append(line)
+ two_lines.pop(0)
+
+ # Compare if there is a manual line break between the two lines.
+ if re.match(r'(\s|\t)*\n', two_lines[0]) or re.match(r'(\s|\t)*\n', two_lines[1]):
+ continue
+ else:
+ if re.match(r'(\s|\t)*(-|\+|(\d+|\w{1})\.|\*)\s*\w*',two_lines[0]) and re.match(r'(\s|\t)*(-|\+|\d+|\w{1}\.|\*)\s*\w*',two_lines[1]):
+ continue
+
+ if mark == 0:
+ print("\n" + filename + ": this file has manual line breaks in the following lines:\n")
+ mark = 1
+
+ print("MANUAL LINE BREAKS: L" + str(lineNum))
+ return mark
+
+
+if __name__ == "__main__":
+
+ count = 0
+
+ for filename in sys.argv[1:]:
+ if os.path.isfile(filename):
+ mark = check_manual_break(filename)
+ if mark :
+ count+=1
+
+ if count:
+ print("\nThe above issues will cause website build failure. Please fix them.")
+ exit(1)
\ No newline at end of file
diff --git a/scripts/check-tags.py b/scripts/check-tags.py
new file mode 100644
index 0000000000000..51eb14ff6920d
--- /dev/null
+++ b/scripts/check-tags.py
@@ -0,0 +1,184 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-tags.py.
+
+import re
+import sys
+import os
+
+# reference: https://stackoverflow.com/questions/35761133/python-how-to-check-for-open-and-close-tags
+def stack_tag(tag, stack):
+ t = tag[1:-1]
+ first_space = t.find(' ')
+ #print(t)
+ if t[-1:] == '/':
+ self_closed_tag = True
+ elif t[:1] != '/':
+ # Add tag to stack
+ if first_space == -1:
+ stack.append(t)
+ # print("TRACE open", stack)
+ else:
+ stack.append(t[:first_space])
+ # print("TRACE open", stack)
+ else:
+ if first_space != -1:
+ t = t[1:first_space]
+ else:
+ t = t[1:]
+
+ if len(stack) == 0:
+ # print("No blocks are open; tried to close", t)
+ closed_tag = True
+ else:
+ if stack[-1] == t:
+ # Close the block
+ stack.pop()
+ # print("TRACE close", t, stack)
+ else:
+ # print("Tried to close", t, "but most recent open block is", stack[-1])
+ if t in stack:
+ stack.remove(t)
+ # print("Prior block closed; continuing")
+
+ # if len(stack):
+ # print("Blocks still open at EOF:", stack)
+ return stack
+
+def tag_is_wrapped(pos, content):
+ tag_start = pos[0]
+ tag_end = pos[1]
+ content_previous = content[:tag_start][::-1] # reverse content_previous
+ content_later = content[tag_end:]
+
+ left_wraps_findall = re.findall(r'`', content_previous)
+ left_single_backtick = len(left_wraps_findall) % 2
+ right_wraps_findall = re.findall(r'`', content_later)
+ right_single_backtick = len(right_wraps_findall) % 2
+ # print(left_single_backtick, right_single_backtick)
+
+ if left_single_backtick != 0 and right_single_backtick != 0:
+ # print(content_previous.find('`'), content_later.find('`'))
+ # print(content_previous)
+ # print(content_later)
+ return True
+ else:
+ # print(content_previous.find('`'), content_later.find('`'))
+ # print(content_previous)
+ # print(content_later)
+ return False
+
+def filter_frontmatter(content):
+ # if there is frontmatter, remove it
+ if content.startswith('---'):
+ collect = []
+ content_finditer = re.finditer(r'---\n', content)
+ for i in content_finditer:
+ meta_pos = i.span()[1]
+ collect.append(meta_pos)
+
+ # if the number of "---" >= 2
+ if len(collect) >= 2:
+ filter_point = collect[1]
+ content = content[filter_point:]
+
+ return content
+
+def filter_backticks(content, filename):
+ # remove content wrapped by backticks
+ backticks = []
+ content_findall = re.findall(r'```', content)
+ if len(content_findall):
+ content_finditer = re.finditer(r'```', content)
+ for i in content_finditer:
+ pos = i.span()
+ backticks.append(pos)
+ # e.g. backticks = [[23, 26],[37, 40],[123, 126],[147, 150]]
+ if len(backticks) % 2 != 0:
+ # print(len(content_findall))
+ # print(backticks)
+ # print(backticks[0][0], backticks[0][1])
+ print(filename, ": Some of your code blocks ``` ``` are not closed. Please close them.")
+ exit(1)
+ elif len(backticks) != 0:
+ backticks_start = backticks[0][0]
+ backticks_end = backticks[1][1]
+ # print(backticks_start, backticks_end)
+ content = content.replace(content[backticks_start:backticks_end],'')
+ content = filter_backticks(content, filename)
+ return content
+
+status_code = 0
+
+# print(sys.argv[1:])
+for filename in sys.argv[1:]:
+ # print("Checking " + filename + "......\n")
+ if os.path.isfile(filename):
+ file = open(filename, "r" )
+ content = file.read()
+ file.close()
+
+ content = filter_frontmatter(content)
+ content = filter_backticks(content, filename)
+ # print(content)
+ result_findall = re.findall(r'<([^\n`>]*)>', content)
+ if len(result_findall) == 0:
+ # print("The edited markdown file " + filename + " has no tags!\n")
+ continue
+ else:
+ result_finditer = re.finditer(r'<([^\n`>]*)>', content)
+ stack = []
+ for i in result_finditer:
+ # print(i.group(), i.span())
+ tag = i.group()
+ pos = i.span()
+
+ if tag[:4] == '':
+ continue
+ elif content[pos[0]-2:pos[0]] == '{{' and content[pos[1]:pos[1]+2] == '}}':
+ # print(tag) # filter copyable shortcodes
+ continue
+ elif tag[:5] == '' for i in stack]
+ print("ERROR: " + filename + ' has unclosed tags: ' + ', '.join(stack) + '.\n')
+ status_code = 1
+
+if status_code:
+ print("HINT: Unclosed tags will cause website build failure. Please fix the reported unclosed tags. You can use backticks `` to wrap them or close them. Thanks.")
+ exit(1)
diff --git a/scripts/check-zh-punctuation.py b/scripts/check-zh-punctuation.py
new file mode 100644
index 0000000000000..87fc0a58b989b
--- /dev/null
+++ b/scripts/check-zh-punctuation.py
@@ -0,0 +1,84 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/check-zh-punctuation.py.
+
+import sys, os, zhon.hanzi
+
+# Check Chinese punctuation in English files.
+
+def check_zh_punctuation(filename):
+
+ lineNum = 0
+ pos = []
+ zh_punc = []
+ acceptable_punc = ['–','—'] # em dash and en dash
+ flag = 0
+
+ with open(filename, 'r') as file:
+ for line in file:
+
+ count = 0
+ lineNum += 1
+ punc_inline = ""
+
+ for char in line:
+
+ if char in zhon.hanzi.punctuation and char not in acceptable_punc :
+ flag = 1
+ if count != 1:
+ pos.append(lineNum)
+ punc_inline += char
+ count = 1
+
+ if punc_inline != "":
+ zh_punc.append(punc_inline)
+
+ if flag:
+ print("\n" + filename + ": this file has Chinese punctuation in the following lines:\n")
+
+ count = 0
+ for lineNum in pos:
+ print("Chinese punctuation: L" + str(lineNum) + " has " + zh_punc[count])
+ count += 1
+
+ return flag
+
+if __name__ == "__main__":
+
+ count = 0
+
+ for filename in sys.argv[1:]:
+ if os.path.isfile(filename):
+ flag = check_zh_punctuation(filename)
+ if flag:
+ count+=1
+
+ if count:
+ print("\nThe above issues will ruin your article. Please convert these marks into English punctuation.")
+ exit(1)
\ No newline at end of file
diff --git a/scripts/file-format-lint.py b/scripts/file-format-lint.py
new file mode 100644
index 0000000000000..e4ba16b3e176b
--- /dev/null
+++ b/scripts/file-format-lint.py
@@ -0,0 +1,156 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/file-format-lint.py.
+
+import re, sys, os, codecs
+
+# Convert the file encoding to the default UTF-8 without BOM.
+def check_BOM(filename):
+ BUFSIZE = 4096
+ BOMLEN = len(codecs.BOM_UTF8)
+
+ with open(filename, "r+b") as fp:
+ chunk = fp.read(BUFSIZE)
+ if chunk.startswith(codecs.BOM_UTF8):
+ i = 0
+ chunk = chunk[BOMLEN:]
+ while chunk:
+ fp.seek(i)
+ fp.write(chunk)
+ i += len(chunk)
+ fp.seek(BOMLEN, os.SEEK_CUR)
+ chunk = fp.read(BUFSIZE)
+ fp.seek(-BOMLEN, os.SEEK_CUR)
+ fp.truncate()
+ print("\n" + filename + ": this file's encoding has been converted to UTF-8 without BOM to avoid broken metadata display.")
+
+# Check control characters.
+def check_control_char(filename):
+
+ lineNum = 0
+ pos = []
+ flag = 0
+
+ with open(filename,'r') as file:
+ for line in file:
+
+ lineNum += 1
+
+ if re.search(r'[\b]', line):
+ pos.append(lineNum)
+ flag = 1
+
+ if flag:
+ print("\n" + filename + ": this file has control characters in the following lines:\n")
+ for cc in pos:
+ print("CONTROL CHARACTERS IN L" + str(cc))
+ print("Please delete these control characters.")
+
+ return flag
+
+
+# Check manual line break within a paragraph.
+def check_manual_break(filename):
+
+ two_lines = []
+ metadata = 0
+ toggle = 0
+ lineNum = 0
+ mark = 0
+
+ with open(filename,'r') as file:
+ for line in file:
+
+ lineNum += 1
+
+ # Count the number of '---' to skip metadata.
+ if metadata < 2 :
+ if re.match(r'(\s|\t)*(-){3}', line):
+ metadata += 1
+ continue
+ else:
+ # Skip tables and notes.
+ if re.match(r'(\s|\t)*(\||>)\s*\w*',line):
+ continue
+
+ # Skip html tags and markdownlint tags.
+ if re.match(r'(\s|\t)*((<\/*\w+>)|)\s*\w*',line):
+ continue
+
+ # Skip links and images.
+ if re.match(r'(\s|\t)*!*\[.+\](\(.+\)|: [a-zA-z]+://[^\s]*)',line):
+ continue
+
+ # Set a toggle to skip code blocks.
+ if re.match(r'(\s|\t)*`{3}', line):
+ toggle = abs(1-toggle)
+
+ if toggle == 1:
+ continue
+ else:
+ # Keep a record of the current line and the former line.
+ if len(two_lines)<1:
+ two_lines.append(line)
+ continue
+ elif len(two_lines) == 1:
+ two_lines.append(line)
+ else:
+ two_lines.append(line)
+ two_lines.pop(0)
+
+ # Compare if there is a manual line break between the two lines.
+ if re.match(r'(\s|\t)*\n', two_lines[0]) or re.match(r'(\s|\t)*\n', two_lines[1]):
+ continue
+ else:
+ if re.match(r'(\s|\t)*(-|\+|(\d+|\w{1})\.|\*)\s*\w*',two_lines[0]) and re.match(r'(\s|\t)*(-|\+|\d+|\w{1}\.|\*)\s*\w*',two_lines[1]):
+ continue
+
+ if mark == 0:
+ print("\n" + filename + ": this file has manual line breaks in the following lines:\n")
+ mark = 1
+
+ print("MANUAL LINE BREAKS: L" + str(lineNum))
+ return mark
+
+
+if __name__ == "__main__":
+
+ count = 0
+
+ for filename in sys.argv[1:]:
+ if os.path.isfile(filename):
+ check_BOM(filename)
+ flag = check_control_char(filename)
+ mark = check_manual_break(filename)
+ if mark or flag:
+ count+=1
+
+ if count:
+ print("\nThe above issues will cause website build failure. Please fix them.")
+ exit(1)
\ No newline at end of file
diff --git a/scripts/get-issue-number.py b/scripts/get-issue-number.py
new file mode 100644
index 0000000000000..e8f8dde36831b
--- /dev/null
+++ b/scripts/get-issue-number.py
@@ -0,0 +1,122 @@
+# Copyright 2021 PingCAP, Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+
+# Copyright (c) 2021 Charlotte Liu
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# This file is originally hosted at https://github.com/CharLotteiu/pingcap-docs-checks/blob/main/get-issue-number.py.
+
+import re, os, sys
+import requests
+from tempfile import mkstemp
+from shutil import move
+from os import remove
+from bs4 import BeautifulSoup
+
+def get_issue_link(pr_url):
+
+ print("Connecting to " + pr_url + " ...")
+
+ response = requests.get(pr_url)
+
+ if response:
+
+ resp = BeautifulSoup(response.text, "html.parser")
+
+ table = resp.find("table", "d-block")
+
+ paragraphs = table.findAll("p")
+
+ flag = 0
+ match = 0
+
+ for p in paragraphs:
+ # print(p.contents[0])
+
+ if isinstance(p.contents[0], str):
+ match = re.search(r'(Issue Number)|(fix)|(bug).*', p.contents[0], re.I)
+
+ if match or p.find('span', attrs = {"class": "issue-keyword"}):
+ issue_link = p.find('a', attrs = {"data-hovercard-type":"issue"}) or p.find('a', attrs = {"class": "issue-link"})
+ if issue_link:
+ flag = 1
+ link = issue_link['href']
+ break
+
+ if flag:
+ print('Related issue number: ' + link)
+ return link
+ else:
+ print("No related issue number.\n")
+ return 0
+
+ #print(paragraphs)
+
+ else:
+ print('Connection failed. No html content')
+ return 0
+
+def change_pr_to_issue(filename):
+
+ fh, target_file_path = mkstemp()
+ source_file_path = filename
+ match_start = 1
+ with open(target_file_path, 'w', encoding='utf-8') as target_file:
+ with open(source_file_path,'r', encoding='utf-8') as source_file:
+
+ for line in source_file:
+
+ if re.match(r'## Bug',line):
+ match_start = 0
+ print("Match Start\n")
+
+ if match_start == 0:
+ matchObj = re.search(r'\[#\d+\]\([a-zA-z]+://[^\s]*\)',line)
+ if matchObj:
+ link = re.search(r'[a-zA-z]+://[^\s]*[^\)]', matchObj.group())
+ pr_url = link.group()
+ issue_url = get_issue_link(pr_url)
+
+ # 判断有记录 issue link 的在原文件中替换
+ if issue_url:
+ issue_num = re.search(r'\d+', issue_url)
+ issue_md = '[#' + issue_num.group() + ']' + '(' + issue_url + ')'
+ line = re.sub(r'\[#\d+\]\([a-zA-z]+://[^\s]*\)', issue_md, line)
+ print(issue_md + '\n')
+
+ target_file.write(line)
+
+ remove(source_file_path)
+ move(target_file_path, source_file_path)
+
+# get_issue_link("https://github.com/pingcap/tidb/pull/22924")
+
+# change_pr_to_issue('./releases/release-4.0.13.md')
+
+if __name__ == "__main__":
+
+ for filename in sys.argv[1:]:
+ if os.path.isfile(filename):
+ change_pr_to_issue(filename)