From 1a0f070e2c2713dbc0126e54b18f3482a82f21b8 Mon Sep 17 00:00:00 2001 From: CrazyMax Date: Mon, 5 Sep 2022 16:33:09 +0200 Subject: [PATCH 1/3] jekyll: fix sitemap lastmod Signed-off-by: CrazyMax --- .dockerignore | 1 - Gemfile | 1 + Gemfile.lock | 5 +++++ _config.yml | 18 ++++++++++++++++++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 756281bdfa56..ac58861bc27a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,4 @@ .DS_Store -.git .github .gitignore .idea diff --git a/Gemfile b/Gemfile index 2bfc1663360f..570ce5108ce4 100644 --- a/Gemfile +++ b/Gemfile @@ -5,6 +5,7 @@ gem 'wdm', '>= 0.1.0' if Gem.win_platform? gem 'jekyll', '4.2.2' group :jekyll_plugins do + gem 'jekyll-last-modified-at' gem 'jekyll-redirect-from' gem 'jekyll-relative-links' gem 'jekyll-sitemap' diff --git a/Gemfile.lock b/Gemfile.lock index ceb17254ed89..64efa9a05cd7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -45,6 +45,9 @@ GEM rouge (~> 3.0) safe_yaml (~> 1.0) terminal-table (~> 2.0) + jekyll-last-modified-at (1.3.0) + jekyll (>= 3.7, < 5.0) + posix-spawn (~> 0.3.9) jekyll-redirect-from (0.16.0) jekyll (>= 3.3, < 5.0) jekyll-relative-links (0.6.1) @@ -88,6 +91,7 @@ GEM parallel (1.22.1) pathutil (0.16.2) forwardable-extended (~> 2.6) + posix-spawn (0.3.15) public_suffix (4.0.7) racc (1.6.0) rainbow (3.1.1) @@ -118,6 +122,7 @@ DEPENDENCIES front_matter_parser (= 1.0.1) html-proofer (= 3.19.4) jekyll (= 4.2.2) + jekyll-last-modified-at jekyll-redirect-from jekyll-relative-links jekyll-sitemap diff --git a/_config.yml b/_config.yml index 361145f38aef..9e976ef85640 100644 --- a/_config.yml +++ b/_config.yml @@ -102,6 +102,24 @@ defaults: toc_min: 2 toc_max: 4 + # Exclude from sitemap + - scope: + path: "assets/**" + values: + sitemap: false + - scope: + path: "**/nav.html" + values: + sitemap: false + - scope: + path: "google*.html" + values: + sitemap: false + - scope: + path: "**/*.pdf" + values: + sitemap: false + # Set the correct edit-URL for some local and remote resources. We usually don't create a direct # edit link for these, and instead point to the directory that contains the file. - scope: From 06908b36f5191d5b39a91f1f64c7a34b749783c8 Mon Sep 17 00:00:00 2001 From: CrazyMax Date: Wed, 26 Oct 2022 02:05:56 +0200 Subject: [PATCH 2/3] jekyll: use git to fetch remote resources Signed-off-by: CrazyMax --- Dockerfile | 1 + Gemfile | 2 +- Gemfile.lock | 19 ++--- _plugins/fetch_remote.rb | 148 ++++++++++++++++++++------------------- 4 files changed, 87 insertions(+), 83 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0784f498365c..2d4f4a9c901c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,6 +47,7 @@ ARG JEKYLL_ENV ARG DOCS_URL ENV TARGET=/out RUN --mount=type=bind,target=.,rw \ + --mount=type=cache,target=/tmp/docker-docs-clone \ --mount=type=cache,target=/src/.jekyll-cache <= 2.0.2, < 5.0) - archive-zip (0.12.0) - io-like (~> 0.3.0) - chef-utils (17.10.0) + addressable (2.8.1) + public_suffix (>= 2.0.2, < 6.0) + chef-utils (18.0.161) concurrent-ruby colorator (1.1.0) concurrent-ruby (1.1.10) @@ -18,6 +16,9 @@ GEM ffi (1.15.5) forwardable-extended (2.6.0) front_matter_parser (1.0.1) + git (1.12.0) + addressable (~> 2.8) + rchardet (~> 1.8) html-proofer (3.19.4) addressable (~> 2.3) mercenary (~> 0.3) @@ -29,7 +30,6 @@ GEM http_parser.rb (0.8.0) i18n (1.12.0) concurrent-ruby (~> 1.0) - io-like (0.3.1) jekyll (4.2.2) addressable (~> 2.4) colorator (~> 1.0) @@ -92,13 +92,14 @@ GEM pathutil (0.16.2) forwardable-extended (~> 2.6) posix-spawn (0.3.15) - public_suffix (4.0.7) + public_suffix (5.0.0) racc (1.6.0) rainbow (3.1.1) rake (13.0.6) - rb-fsevent (0.11.1) + rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) + rchardet (1.8.0) rexml (3.2.5) rouge (3.27.0) safe_yaml (1.0.5) @@ -118,8 +119,8 @@ PLATFORMS x86_64-linux DEPENDENCIES - archive-zip (= 0.12.0) front_matter_parser (= 1.0.1) + git (= 1.12.0) html-proofer (= 3.19.4) jekyll (= 4.2.2) jekyll-last-modified-at diff --git a/_plugins/fetch_remote.rb b/_plugins/fetch_remote.rb index 57ed8f77bbb9..3a8b69c71851 100644 --- a/_plugins/fetch_remote.rb +++ b/_plugins/fetch_remote.rb @@ -1,9 +1,8 @@ -require 'archive/zip' require 'front_matter_parser' +require 'git' require 'jekyll' require 'json' require 'octopress-hooks' -require 'open-uri' require 'rake' require_relative 'util.rb' @@ -12,14 +11,6 @@ module Jekyll class FetchRemote < Octopress::Hooks::Site priority :highest - def self.download(url, dest) - uri = URI.parse(url) - result = File.join(dest, File.basename(uri.path)) - puts " Downloading #{url}" - IO.copy_stream(URI.open(url), result) - return result - end - def self.copy(src, dest) if (tmp = Array.try_convert(src)) tmp.each do |s| @@ -50,75 +41,86 @@ def pre_read(site) beginning_time = Time.now puts "Starting plugin fetch_remote.rb..." site.config['fetch-remote'].each do |entry| - puts " Repo #{entry['repo']} (#{entry['ref']})" - Dir.mktmpdir do |tmpdir| - tmpfile = FetchRemote.download("#{entry['repo']}/archive/#{entry['ref']}.zip", tmpdir) - Dir.mktmpdir do |ztmpdir| - puts " Extracting #{tmpfile}" - Archive::Zip.extract( - tmpfile, - ztmpdir, - :create => true - ) - entry['paths'].each do |path| - if File.extname(path['dest']) != "" - if path['src'].size > 1 - raise "Cannot use file destination #{path['dest']} with multiple sources" - end - FileUtils.mkdir_p File.dirname(path['dest']) - else - FileUtils.mkdir_p path['dest'] - end + puts " Repo #{entry['repo']}" - puts " Copying files" + gituri = Git::URL.parse(entry['repo']) + clonedir = "#{Dir.tmpdir}/docker-docs-clone#{gituri.path}" + if Dir.exist?(clonedir) + puts " Opening #{clonedir}" + begin + git = Git.open(clonedir) + git.chdir do + puts " Fetching #{entry['ref']}" + git.checkout(entry['ref']) + git.fetch + end + rescue => e + FileUtils.rm_rf(clonedir) + puts " Cloning repository into #{clonedir}" + Git.clone("#{entry['repo']}.git", Pathname.new(clonedir), branch: entry['ref']) + end + else + puts " Cloning repository into #{clonedir}" + Git.clone("#{entry['repo']}.git", Pathname.new(clonedir), branch: entry['ref']) + end - # prepare file list to be copied - files = FileList[] - path['src'].each do |src| - if "#{src}".start_with?("!") - files.exclude(File.join(ztmpdir, "*/"+"#{src}".delete_prefix("!"))) - else - files.include(File.join(ztmpdir, "*/#{src}")) - end - end + entry['paths'].each do |path| + if File.extname(path['dest']) != "" + if path['src'].size > 1 + raise "Cannot use file destination #{path['dest']} with multiple sources" + end + FileUtils.mkdir_p File.dirname(path['dest']) + else + FileUtils.mkdir_p path['dest'] + end + + puts " Copying files" - files.each do |file| - FetchRemote.copy(file, path['dest']) do |s, d| - s = File.realpath(s) - # traverse source directory - FileUtils::Entry_.new(s, nil, false).wrap_traverse(proc do |ent| - file_clean = ent.path.delete_prefix(ztmpdir).split("/").drop(2).join("/") - destent = FileUtils::Entry_.new(d, ent.rel, false) - puts " #{file_clean} => #{destent.path}" + # prepare file list to be copied + files = FileList[] + path['src'].each do |src| + if "#{src}".start_with?("!") + files.exclude(File.join(clonedir, "/"+"#{src}".delete_prefix("!"))) + else + files.include(File.join(clonedir, "/#{src}")) + end + end - if File.file?(destent.path) - fmp = FrontMatterParser::Parser.parse_file(destent.path) - if fmp['fetch_remote'].nil? - raise "Local file #{destent.path} already exists" - end - line_start, line_end = FetchRemote.resolve_line_numbers(fmp['fetch_remote'].kind_of?(Hash) ? fmp['fetch_remote']['line_start'] : nil, fmp['fetch_remote'].kind_of?(Hash) ? fmp['fetch_remote']['line_end'] : nil) - lines = File.readlines(ent.path)[line_start..line_end] - File.open(destent.path, "a") { |fow| fow.puts lines.join } - else - ent.copy destent.path - end + files.each do |file| + FetchRemote.copy(file, path['dest']) do |s, d| + s = File.realpath(s) + # traverse source directory + FileUtils::Entry_.new(s, nil, false).wrap_traverse(proc do |ent| + file_clean = ent.path.delete_prefix(clonedir).split("/").drop(1).join("/") + destent = FileUtils::Entry_.new(d, ent.rel, false) + puts " #{file_clean} => #{destent.path}" - next unless File.file?(ent.path) && File.extname(ent.path) == ".md" - # set edit and issue url and remote info for markdown files in site config defaults - edit_url = "#{entry['repo']}/edit/#{entry['default_branch']}/#{file_clean}" - issue_url = "#{entry['repo']}/issues/new?body=File: [#{file_clean}](#{get_docs_url}/#{destent.path.sub(/#{File.extname(destent.path)}$/, '')}/)" - puts " edit_url: #{edit_url}" - puts " issue_url: #{issue_url}" - site.config['defaults'] << { - "scope" => { "path" => destent.path }, - "values" => { - "edit_url" => edit_url, - "issue_url" => issue_url - }, - } - end, proc do |_| end) + if File.file?(destent.path) + fmp = FrontMatterParser::Parser.parse_file(destent.path) + if fmp['fetch_remote'].nil? + raise "Local file #{destent.path} already exists" + end + line_start, line_end = FetchRemote.resolve_line_numbers(fmp['fetch_remote'].kind_of?(Hash) ? fmp['fetch_remote']['line_start'] : nil, fmp['fetch_remote'].kind_of?(Hash) ? fmp['fetch_remote']['line_end'] : nil) + lines = File.readlines(ent.path)[line_start..line_end] + File.open(destent.path, "a") { |fow| fow.puts lines.join } + else + ent.copy destent.path end - end + + next unless File.file?(ent.path) && File.extname(ent.path) == ".md" + # set edit and issue url and remote info for markdown files in site config defaults + edit_url = "#{entry['repo']}/edit/#{entry['default_branch']}/#{file_clean}" + issue_url = "#{entry['repo']}/issues/new?body=File: [#{file_clean}](#{get_docs_url}/#{destent.path.sub(/#{File.extname(destent.path)}$/, '')}/)" + puts " edit_url: #{edit_url}" + puts " issue_url: #{issue_url}" + site.config['defaults'] << { + "scope" => { "path" => destent.path }, + "values" => { + "edit_url" => edit_url, + "issue_url" => issue_url + }, + } + end, proc do |_| end) end end end From a16eff5814bc380c6379c31a87f4f6bc840f4c87 Mon Sep 17 00:00:00 2001 From: CrazyMax Date: Wed, 26 Oct 2022 05:59:17 +0200 Subject: [PATCH 3/3] jekyll: last modification date for remote resources Signed-off-by: CrazyMax --- Gemfile | 1 - Gemfile.lock | 5 ----- _plugins/fetch_remote.rb | 21 +++++++++++---------- _plugins/last_modified_at.rb | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 16 deletions(-) create mode 100644 _plugins/last_modified_at.rb diff --git a/Gemfile b/Gemfile index 32ecc842ec5b..6ac458fb5c26 100644 --- a/Gemfile +++ b/Gemfile @@ -5,7 +5,6 @@ gem 'wdm', '>= 0.1.0' if Gem.win_platform? gem 'jekyll', '4.2.2' group :jekyll_plugins do - gem 'jekyll-last-modified-at' gem 'jekyll-redirect-from' gem 'jekyll-relative-links' gem 'jekyll-sitemap' diff --git a/Gemfile.lock b/Gemfile.lock index 3a5e9de03a92..51374f9eb814 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -45,9 +45,6 @@ GEM rouge (~> 3.0) safe_yaml (~> 1.0) terminal-table (~> 2.0) - jekyll-last-modified-at (1.3.0) - jekyll (>= 3.7, < 5.0) - posix-spawn (~> 0.3.9) jekyll-redirect-from (0.16.0) jekyll (>= 3.3, < 5.0) jekyll-relative-links (0.6.1) @@ -91,7 +88,6 @@ GEM parallel (1.22.1) pathutil (0.16.2) forwardable-extended (~> 2.6) - posix-spawn (0.3.15) public_suffix (5.0.0) racc (1.6.0) rainbow (3.1.1) @@ -123,7 +119,6 @@ DEPENDENCIES git (= 1.12.0) html-proofer (= 3.19.4) jekyll (= 4.2.2) - jekyll-last-modified-at jekyll-redirect-from jekyll-relative-links jekyll-sitemap diff --git a/_plugins/fetch_remote.rb b/_plugins/fetch_remote.rb index 3a8b69c71851..158fb3919a0c 100644 --- a/_plugins/fetch_remote.rb +++ b/_plugins/fetch_remote.rb @@ -49,19 +49,17 @@ def pre_read(site) puts " Opening #{clonedir}" begin git = Git.open(clonedir) - git.chdir do - puts " Fetching #{entry['ref']}" - git.checkout(entry['ref']) - git.fetch - end + puts " Fetching #{entry['ref']}" + git.fetch + git.checkout(entry['ref']) rescue => e FileUtils.rm_rf(clonedir) puts " Cloning repository into #{clonedir}" - Git.clone("#{entry['repo']}.git", Pathname.new(clonedir), branch: entry['ref']) + git = Git.clone("#{entry['repo']}.git", Pathname.new(clonedir), branch: entry['ref'], depth: 1) end else puts " Cloning repository into #{clonedir}" - Git.clone("#{entry['repo']}.git", Pathname.new(clonedir), branch: entry['ref']) + git = Git.clone("#{entry['repo']}.git", Pathname.new(clonedir), branch: entry['ref'], depth: 1) end entry['paths'].each do |path| @@ -111,13 +109,16 @@ def pre_read(site) # set edit and issue url and remote info for markdown files in site config defaults edit_url = "#{entry['repo']}/edit/#{entry['default_branch']}/#{file_clean}" issue_url = "#{entry['repo']}/issues/new?body=File: [#{file_clean}](#{get_docs_url}/#{destent.path.sub(/#{File.extname(destent.path)}$/, '')}/)" - puts " edit_url: #{edit_url}" - puts " issue_url: #{issue_url}" + last_modified_at = git.log.path(file_clean).first.date.strftime(LastModifiedAt::DATE_FORMAT) + puts " edit_url: #{edit_url}" + puts " issue_url: #{issue_url}" + puts " last_modified_at: #{last_modified_at}" site.config['defaults'] << { "scope" => { "path" => destent.path }, "values" => { "edit_url" => edit_url, - "issue_url" => issue_url + "issue_url" => issue_url, + "last_modified_at" => last_modified_at, }, } end, proc do |_| end) diff --git a/_plugins/last_modified_at.rb b/_plugins/last_modified_at.rb new file mode 100644 index 000000000000..09bddfd1d945 --- /dev/null +++ b/_plugins/last_modified_at.rb @@ -0,0 +1,35 @@ +require 'git' +require 'jekyll' +require 'octopress-hooks' + +module Jekyll + class LastModifiedAt < Octopress::Hooks::Site + DATE_FORMAT = '%Y-%m-%d %H:%M:%S %z' + def pre_render(site) + if get_docs_url == "http://localhost:4000" + # Do not generate last_modified_at for local development + return + end + + beginning_time = Time.now + Jekyll.logger.info "Starting plugin last_modified_at.rb..." + + git = Git.open(site.source) + site.pages.each do |page| + next if page.relative_path == "redirect.html" + next unless File.extname(page.relative_path) == ".md" || File.extname(page.relative_path) == ".html" + unless page.data.key?('last_modified_at') + begin + page.data['last_modified_at'] = git.log.path(page.relative_path).first.date.strftime(DATE_FORMAT) + rescue => e + # Ignored + end + end + puts" #{page.relative_path}\n last_modified_at: #{page.data['last_modified_at']}" + end + + end_time = Time.now + Jekyll.logger.info "done in #{(end_time - beginning_time)} seconds" + end + end +end