From 215b58f568e8d39cfae716af251e0c31c2d6fcac Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 10:22:23 +0200 Subject: [PATCH 01/10] Update EnrollUtil.cs and PathUtils.cs --- PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs | 6 +++--- PoliNetwork.Graduatorie.Common/Utils/Path/PathUtils.cs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs index 3085663e..e2a21868 100644 --- a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs +++ b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs @@ -16,7 +16,7 @@ public static EnrollType GetEnrollType(string? rowCanEnrollInto, bool rowCanEnro if (string.IsNullOrEmpty(rowCanEnrollInto)) return new EnrollType { CanEnroll = true, Course = null, Type = null }; - string[] tester = {"assegnato", "prenotato"}; + string[] tester = { "assegnato", "prenotato" }; const string sep = " - "; if (!rowCanEnrollInto.Contains(sep) || !tester.Any(t => rowCanEnrollInto.ToLower().Contains(t))) return new EnrollType { CanEnroll = true, Course = rowCanEnrollInto, Type = null }; @@ -24,7 +24,7 @@ public static EnrollType GetEnrollType(string? rowCanEnrollInto, bool rowCanEnro var s = rowCanEnrollInto.Split(sep).ToList(); var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower())); s.Remove(type); - var course = String.Join(sep, s); + var course = string.Join(sep, s); return new EnrollType { CanEnroll = true, Course = course, Type = type }; } -} +} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Common/Utils/Path/PathUtils.cs b/PoliNetwork.Graduatorie.Common/Utils/Path/PathUtils.cs index f052fbaf..16ed2eba 100644 --- a/PoliNetwork.Graduatorie.Common/Utils/Path/PathUtils.cs +++ b/PoliNetwork.Graduatorie.Common/Utils/Path/PathUtils.cs @@ -37,4 +37,4 @@ public static string CreateAndReturnDataFolder(string folderName) Directory.CreateDirectory(dataFolderPath); return dataFolderPath; } -} +} \ No newline at end of file From 6ea119549ea7f0334366e3a7fd04401b4c161112 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 10:24:35 +0200 Subject: [PATCH 02/10] Update RankingOrder.cs --- .../Objects/RankingNS/RankingOrder.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs index 2a7e9b13..5a24d668 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs @@ -82,6 +82,10 @@ public int GetHashWithoutLastUpdate() public void Merge(RankingOrder? rankingRankingOrder) { - throw new NotImplementedException(); + Anticipata ??= rankingRankingOrder?.Anticipata; + Phase ??= rankingRankingOrder?.Phase; + Primary ??= rankingRankingOrder?.Primary; + Secondary ??= rankingRankingOrder?.Secondary; + ExtraEu ??= rankingRankingOrder?.ExtraEu; } } \ No newline at end of file From 3614b5a1cb4a003db945a55736780fd0f2bd2828 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 10:27:07 +0200 Subject: [PATCH 03/10] Update RankingUrl.cs, EnrollUtil.cs, HashMatricola.cs, and 3 more files --- .../Objects/RankingNS/RankingUrl.cs | 2 +- .../Utils/EnrollUtil.cs | 9 ++++++-- .../Utils/HashNS/HashMatricola.cs | 5 +---- .../Specific/BySchoolYearCourseJson.cs | 22 ++++++++++--------- .../Utils/Output/HashMatricoleWrite.cs | 10 ++++----- .../Utils/Transformer/ParserNS/Parser.cs | 14 +++++------- 6 files changed, 30 insertions(+), 32 deletions(-) diff --git a/PoliNetwork.Graduatorie.Common/Objects/RankingNS/RankingUrl.cs b/PoliNetwork.Graduatorie.Common/Objects/RankingNS/RankingUrl.cs index 5fa78164..ebafa918 100644 --- a/PoliNetwork.Graduatorie.Common/Objects/RankingNS/RankingUrl.cs +++ b/PoliNetwork.Graduatorie.Common/Objects/RankingNS/RankingUrl.cs @@ -129,7 +129,7 @@ public bool IsSameRanking(RankingUrl urlB) return AreSameRanking(this, urlB); } - public bool AreSameRanking(RankingUrl urlA, RankingUrl urlB) + public static bool AreSameRanking(RankingUrl urlA, RankingUrl urlB) { var a = urlA.Url; var b = urlB.Url; diff --git a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs index e2a21868..a5446ec6 100644 --- a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs +++ b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs @@ -23,8 +23,13 @@ public static EnrollType GetEnrollType(string? rowCanEnrollInto, bool rowCanEnro var s = rowCanEnrollInto.Split(sep).ToList(); var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower())); - s.Remove(type); + if (type != null) + { + s.Remove(type); + } + var course = string.Join(sep, s); - return new EnrollType { CanEnroll = true, Course = course, Type = type }; + return new EnrollType { CanEnroll = true, Course = course, Type = type }; + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs b/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs index 555a0c64..f685c431 100644 --- a/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs +++ b/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs @@ -21,10 +21,7 @@ public static partial class HashMatricola if (input.Contains(' ')) input = input.Split(" ").First(x => !string.IsNullOrEmpty(x)); var s = input.Trim().ToUpper(); - if (string.IsNullOrEmpty(s)) - return null; - - return NotAlphaNumericRegex().Replace(s, ""); + return string.IsNullOrEmpty(s) ? null : NotAlphaNumericRegex().Replace(s, ""); } public static string? HashMatricolaMethod(string? input) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index c449a007..947da65f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -116,16 +116,17 @@ private static void AddCourseToDict( var locationDict = courseDict[fixedLocation]; var singleCourseJson = CreateCourseJson(ranking, course); - bool IsThisCourse(SingleCourseJson x) - { - return x.Link == singleCourseJson.Link && x.Location == singleCourseJson.Location; - } - if (locationDict.Any(IsThisCourse)) continue; locationDict.Add(singleCourseJson); locationDict.Sort(Comparison); + continue; + + bool IsThisCourse(SingleCourseJson x) + { + return x.Link == singleCourseJson.Link && x.Location == singleCourseJson.Location; + } } } @@ -152,14 +153,14 @@ private static bool IsSimilar(IEnumerable yearGroup, SingleCourseJson s { var enumerable = yearGroup.Where(v1 => v1.ByCourse != null); + return enumerable.Any(Predicate); + bool Predicate(Ranking v1) { return singleCourseJson.School == v1.School && singleCourseJson.Year == v1.Year && v1.RankingOrder?.Phase == singleCourseJson.Name; } - - return enumerable.Any(Predicate); } public static RankingsSet? Parse(string dataFolder) @@ -207,13 +208,14 @@ ICollection rankings var actions = new List(); foreach (var filename in year.Value) { + var collection = filename.Value.Select(Selector); + actions.AddRange(collection); + continue; + Action Selector(KeyValuePair> variable) { return () => { RankingAdd(school, year, outFolder, variable, rankings); }; } - - var collection = filename.Value.Select(Selector); - actions.AddRange(collection); } ParallelRun.Run(actions.ToArray()); diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs index e2154f49..38e00e90 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs @@ -28,9 +28,8 @@ private static SortedDictionary GetDictToWrite(Ranki { var byMeritRows = ranking.ByMerit?.Rows; if (byMeritRows != null) - foreach (var student in byMeritRows) - if (!string.IsNullOrEmpty(student.Id)) - AddToDict(dictionary, ranking, student, null); + foreach (var student in byMeritRows.Where(student => !string.IsNullOrEmpty(student.Id))) + AddToDict(dictionary, ranking, student, null); var rankingByCourse = ranking.ByCourse; if (rankingByCourse == null) continue; @@ -38,9 +37,8 @@ private static SortedDictionary GetDictToWrite(Ranki { var row = courseTable.Rows; if (row == null) continue; - foreach (var studentResult in row) - if (!string.IsNullOrEmpty(studentResult.Id)) - AddToDict(dictionary, ranking, studentResult, courseTable); + foreach (var studentResult in row.Where(studentResult => !string.IsNullOrEmpty(studentResult.Id))) + AddToDict(dictionary, ranking, studentResult, courseTable); } } diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs index ad41a5fe..3bd8d6dd 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs @@ -131,9 +131,7 @@ private RankingsSet ParseNewRankings(List htmls) .Select(url => { var found = meritTablePages.Find(h => h.Url.Url == url.Url); - if (found != null) - return found; - return HtmlPage.FromUrl(url, _htmlFolder); + return found ?? HtmlPage.FromUrl(url, _htmlFolder); }) .Where(h => h != null) .Select(h => h!) @@ -144,9 +142,7 @@ private RankingsSet ParseNewRankings(List htmls) .Select(url => { var found = courseTablePages.Find(h => h.Url.Url == url.Url); - if (found != null) - return found; - return HtmlPage.FromUrl(url, _htmlFolder); + return found ?? HtmlPage.FromUrl(url, _htmlFolder); }) .Where(h => h != null) .Select(h => h!) @@ -186,7 +182,7 @@ private RankingsSet ParseNewRankings(List htmls) return set; } - private Ranking? InitRanking(RankingUrl indexUrl, HtmlNode doc) + private static Ranking? InitRanking(RankingUrl indexUrl, HtmlNode doc) { var ranking = new Ranking(); // get ranking info @@ -268,7 +264,7 @@ private static IEnumerable GetTableLinks(HtmlPage html) return tablesLinks; } - private Table ParseMeritTable(IEnumerable pages) + private static Table ParseMeritTable(IEnumerable pages) { var table = JoinTables(pages); var meritTable = Table.Create( @@ -282,7 +278,7 @@ private Table ParseMeritTable(IEnumerable pages) return meritTable; } - private IEnumerable> ParseCoursesTables(IEnumerable pages) + private static IEnumerable> ParseCoursesTables(IEnumerable pages) { var tables = GetTables(pages); var coursesTables = tables.Select( From ae3bbca59c68b5a776aeb5946b18e5c4a0f89187 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 10:28:50 +0200 Subject: [PATCH 04/10] Update BySchoolYearCourseJson.cs, BySchoolYearJson.cs, and Parser.cs --- .../Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs | 4 ++-- .../Objects/Json/Indexes/Specific/BySchoolYearJson.cs | 2 +- .../Utils/Transformer/ParserNS/Parser.cs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index 947da65f..380a36af 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -62,7 +62,7 @@ private static SortedDictionary< } private static void GetYearsDictSingle(IGrouping yearGroup, - SortedDictionary>>> d) + IDictionary>>> d) { if (yearGroup.Key != null) d.Add(yearGroup.Key.Value, GetCoursesDict(yearGroup)); } @@ -87,7 +87,7 @@ IEnumerable yearGroup } private static void AddCourseToDict( - SortedDictionary>> d, + IDictionary>> d, Ranking ranking, IGrouping courseGroup ) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs index 265496a0..a0749948 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs @@ -50,7 +50,7 @@ public class BySchoolYearJson : IndexJsonBase private static void AddSchool( IGrouping yearGroup, - SortedDictionary> schoolDict + IDictionary> schoolDict ) { var yearGroupKey = yearGroup.Key; diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs index 3bd8d6dd..b0696910 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs @@ -94,7 +94,7 @@ private RankingsSet ParseSavedRankings(ICollection htmls) return savedSet; } - private RankingsSet ParseNewRankings(List htmls) + private RankingsSet ParseNewRankings(IReadOnlyCollection htmls) { // pseudo // new ranking set From 387bf0794bc6d5c6ef67eeb8d2bad33f3f100e90 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 11:14:21 +0200 Subject: [PATCH 05/10] Update LinksFind.cs --- .../Utils/Web/LinksFind.cs | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs index 947ce73d..cfdb1890 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs @@ -22,18 +22,48 @@ public static IEnumerable GetAll() var rankingsLinks = new HashSet(); rankingsLinks.AddRange(polimiNewsLinks, combinationLinks); - var rankingsUrls = rankingsLinks - .AsParallel() // from 500ms to 86ms - .Select(RankingUrl.From) - .Where(r => r.PageEnum == PageEnum.Index) - .Where(UrlUtils.CheckUrl) - .ToHashSet(); + var rankingsUrls = GetRankingLinks(rankingsLinks); var len = rankingsUrls.ToArray().Length; Console.WriteLine($"[INFO] LinksFind.GetAll found {len} links"); return rankingsUrls; } + private static HashSet GetRankingLinks(IEnumerable rankingsLinks) + { + var parallelQuery = rankingsLinks + .AsParallel() + .Select(RankingUrl.From) + .Where(r => r.PageEnum == PageEnum.Index).ToList(); + + var final = new HashSet(); + + var action = parallelQuery.Select((Func)Selector).ToArray(); + Parallel.Invoke(action); + + return final; + + Action Selector(RankingUrl variable) => + () => { CheckUrl(variable, final); }; + } + + private static void CheckUrl(RankingUrl variable, HashSet final) + { + try + { + var x = UrlUtils.CheckUrl(variable); + if (!x) return; + lock (final) + { + final.Add(variable); + } + } + catch (Exception exception) + { + Console.WriteLine(exception); + } + } + private static IEnumerable GetCombinationLinks() { var r = new HashSet(); From 3391414e132911578a3dd9de88e0014fd91f34a6 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 11:15:30 +0200 Subject: [PATCH 06/10] Update CheckUrlUtil.cs and LinksFind.cs --- .../Utils/Web/CheckUrlUtil.cs | 42 +++++++++++++++++++ .../Utils/Web/LinksFind.cs | 38 ++--------------- 2 files changed, 45 insertions(+), 35 deletions(-) create mode 100644 PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs new file mode 100644 index 00000000..4a0dbe56 --- /dev/null +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs @@ -0,0 +1,42 @@ +using PoliNetwork.Graduatorie.Common.Enums; +using PoliNetwork.Graduatorie.Common.Objects.RankingNS; + +namespace PoliNetwork.Graduatorie.Scraper.Utils.Web; + +public class CheckUrlUtil +{ + public static void CheckUrl(RankingUrl variable, HashSet final) + { + try + { + var x = UrlUtils.CheckUrl(variable); + if (!x) return; + lock (final) + { + final.Add(variable); + } + } + catch (Exception exception) + { + Console.WriteLine(exception); + } + } + + public static HashSet GetRankingLinks(IEnumerable rankingsLinks) + { + var parallelQuery = rankingsLinks + .AsParallel() + .Select(RankingUrl.From) + .Where(r => r.PageEnum == PageEnum.Index).ToList(); + + var final = new HashSet(); + + var action = parallelQuery.Select((Func)Selector).ToArray(); + Parallel.Invoke(action); + + return final; + + Action Selector(RankingUrl variable) => + () => { CheckUrlUtil.CheckUrl(variable, final); }; + } +} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs index cfdb1890..1841a581 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs @@ -22,57 +22,25 @@ public static IEnumerable GetAll() var rankingsLinks = new HashSet(); rankingsLinks.AddRange(polimiNewsLinks, combinationLinks); - var rankingsUrls = GetRankingLinks(rankingsLinks); + var rankingsUrls = CheckUrlUtil.GetRankingLinks(rankingsLinks); var len = rankingsUrls.ToArray().Length; Console.WriteLine($"[INFO] LinksFind.GetAll found {len} links"); return rankingsUrls; } - private static HashSet GetRankingLinks(IEnumerable rankingsLinks) - { - var parallelQuery = rankingsLinks - .AsParallel() - .Select(RankingUrl.From) - .Where(r => r.PageEnum == PageEnum.Index).ToList(); - - var final = new HashSet(); - var action = parallelQuery.Select((Func)Selector).ToArray(); - Parallel.Invoke(action); - return final; - - Action Selector(RankingUrl variable) => - () => { CheckUrl(variable, final); }; - } - - private static void CheckUrl(RankingUrl variable, HashSet final) - { - try - { - var x = UrlUtils.CheckUrl(variable); - if (!x) return; - lock (final) - { - final.Add(variable); - } - } - catch (Exception exception) - { - Console.WriteLine(exception); - } - } private static IEnumerable GetCombinationLinks() { var r = new HashSet(); var nowYear = DateTime.UtcNow.Year; - for (var i = 2021; i <= nowYear; i++) r.AddRange(GetYearCominationLinks(i)); + for (var i = 2021; i <= nowYear; i++) r.AddRange(GetYearCombinationLinks(i)); return r; } - private static IEnumerable GetYearCominationLinks(int year) + private static IEnumerable GetYearCombinationLinks(int year) { // partial implemented: polimi has recently added 4 hex chars in the first part // of the path (2022_20064_XXXX_html/) which would require 65k combinations for each From 073fe78f6a5654c1d270689fa0d77d74f3168ded Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 11:15:36 +0200 Subject: [PATCH 07/10] Update EnrollUtil.cs --- PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs index a5446ec6..cd1482cc 100644 --- a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs +++ b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs @@ -23,13 +23,9 @@ public static EnrollType GetEnrollType(string? rowCanEnrollInto, bool rowCanEnro var s = rowCanEnrollInto.Split(sep).ToList(); var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower())); - if (type != null) - { - s.Remove(type); - } + if (type != null) s.Remove(type); var course = string.Join(sep, s); - return new EnrollType { CanEnroll = true, Course = course, Type = type }; - + return new EnrollType { CanEnroll = true, Course = course, Type = type }; } } \ No newline at end of file From c823d09dd4303bcdc368fce590c04df8f71866c1 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 11:15:51 +0200 Subject: [PATCH 08/10] Update CheckUrlUtil.cs and LinksFind.cs --- .../Utils/Web/CheckUrlUtil.cs | 10 ++++++++-- PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs | 3 --- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs index 4a0dbe56..e15dc18d 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs @@ -1,6 +1,10 @@ +#region + using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; +#endregion + namespace PoliNetwork.Graduatorie.Scraper.Utils.Web; public class CheckUrlUtil @@ -36,7 +40,9 @@ public static HashSet GetRankingLinks(IEnumerable rankingsLi return final; - Action Selector(RankingUrl variable) => - () => { CheckUrlUtil.CheckUrl(variable, final); }; + Action Selector(RankingUrl variable) + { + return () => { CheckUrl(variable, final); }; + } } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs index 1841a581..77d782ff 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/LinksFind.cs @@ -2,7 +2,6 @@ using PoliNetwork.Core.Utils; using PoliNetwork.Graduatorie.Common.Data; -using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Common.Extensions; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; @@ -30,8 +29,6 @@ public static IEnumerable GetAll() } - - private static IEnumerable GetCombinationLinks() { var r = new HashSet(); From 477853a647a19191da02ee38eaf134d57ad4958a Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 11:31:53 +0200 Subject: [PATCH 09/10] Update UrlUtils.cs --- PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs index 1e3eb03b..e54ea691 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/UrlUtils.cs @@ -23,13 +23,15 @@ public static string UrlifyLocalHref(string href, string? domain) public static bool CheckUrl(RankingUrl? url) { - if (string.IsNullOrEmpty(url?.Url)) + var urlUrl = url?.Url; + if (string.IsNullOrEmpty(urlUrl)) return false; using var client = new HttpClient(); try { - var response = client.GetAsync(url.Url).Result; + var async = client.GetAsync(urlUrl); + var response = async.Result; return response.StatusCode == HttpStatusCode.OK; } catch (HttpRequestException) From 39eb88f81e623ba5ed436ccb0c0bf7ab78e0b483 Mon Sep 17 00:00:00 2001 From: user Date: Fri, 11 Aug 2023 11:42:39 +0200 Subject: [PATCH 10/10] Update CheckUrlUtil.cs and ScraperOutput.cs --- PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs | 5 +++++ PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs index e15dc18d..5c9809fb 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs @@ -33,6 +33,11 @@ public static HashSet GetRankingLinks(IEnumerable rankingsLi .Select(RankingUrl.From) .Where(r => r.PageEnum == PageEnum.Index).ToList(); + return GetRankingLinksHashSet(parallelQuery); + } + + public static HashSet GetRankingLinksHashSet(IEnumerable parallelQuery) + { var final = new HashSet(); var action = parallelQuery.Select((Func)Selector).ToArray(); diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs index 3a3e768f..cf79fe07 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs @@ -66,7 +66,7 @@ public static void Write(List rankingsUrls, string? dataFolder) private static string GetOutputLinksString(IEnumerable rankingsUrls) { var output = ""; - var urls = rankingsUrls.Where(UrlUtils.CheckUrl).Select(x => x.Url).Order(); + var urls = CheckUrlUtil.GetRankingLinksHashSet(rankingsUrls).Order(); foreach (var link in urls) { output += link;