diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index ff6fea89..c6179798 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -6,6 +6,9 @@ using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; +using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; +using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; +using PoliNetwork.Graduatorie.Parser.Utils; #endregion @@ -62,16 +65,73 @@ private static void WriteSingleJsonRanking(string folder, Ranking ranking, ArgsC File.WriteAllText(path, rankingJsonString); } - private static bool ExitIfAlreadyExistsAndNotUpdated(Ranking ranking, string path) + private static bool ExitIfAlreadyExistsAndNotUpdated(Ranking a, string path) { if (!File.Exists(path)) return false; + var b = GetRankingFromFile(path); + return b != null && SameHash(a, b); + } + + private static bool SameHash(Ranking a, Ranking b) + { + var ai = a.GetHashWithoutLastUpdate(); + var bi = b.GetHashWithoutLastUpdate(); + return ai == bi; + } + + private static bool SameHashCourse(IReadOnlyCollection? aTableCourse, + IReadOnlyCollection? bTableCourse) + { + if (aTableCourse == null && bTableCourse == null) + return true; + if (aTableCourse == null || bTableCourse == null) + return false; + + ; + + if (aTableCourse.Count != bTableCourse.Count) + return false; + ; + + var aHash = aTableCourse.Select(variable => + { + var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate()); + return hashWithoutLastUpdate; + }).ToList(); + var bHash = bTableCourse.Select(variable => + { + var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate()); + return hashWithoutLastUpdate; + }).ToList(); + + var ai = Hashing.GetHashFromListHash(aHash); + var bi = Hashing.GetHashFromListHash(bHash); + + return (ai ?? 0) == (bi ?? 0); + } + + private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMerit) + { + if (aTableMerit == null && bTableMerit == null) + return true; + if (aTableMerit == null || bTableMerit == null) + return false; + + ; + var ai = aTableMerit.GetHashWithoutLastUpdate(); + var bi = bTableMerit.GetHashWithoutLastUpdate(); + var aii = Hashing.GetHashFromListHash(ai) ?? 0; + var bii = Hashing.GetHashFromListHash(bi) ?? 0; + return aii == bii; + } + + private static Ranking? GetRankingFromFile(string path) + { var x = File.ReadAllText(path); var j = JsonConvert.DeserializeObject(x, Culture.JsonSerializerSettings); - var hashThis = ranking.GetHashWithoutLastUpdate(); - var hashJ = j?.GetHashWithoutLastUpdate(); - return hashThis == hashJ; + return j; } public static void IndexesWrite(RankingsSet? rankingsSet, string outFolder, ArgsConfig argsConfig) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index f010b94c..e0778d8d 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -9,6 +9,7 @@ using PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; +using PoliNetwork.Graduatorie.Parser.Utils; using PoliNetwork.Graduatorie.Parser.Utils.Output; #endregion @@ -47,12 +48,19 @@ public int GetHashWithoutLastUpdate() i ^= School?.GetHashCode() ?? "School".GetHashCode(); i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode(); i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); - i ^= ByMerit?.GetHashWithoutLastUpdate() ?? "ByMerit".GetHashCode(); + var iMerit = ByMerit?.GetHashWithoutLastUpdate(); + i ^= Hashing.GetHashFromListHash(iMerit) ?? "ByMerit".GetHashCode(); + if (ByCourse == null) i ^= "ByCourse".GetHashCode(); else - i = ByCourse.Aggregate(i, (current, variable) => current ^ variable.GetHashWithoutLastUpdate()); + i = ByCourse.Aggregate(i, (current, variable) => + { + var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); + var iList = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty".GetHashCode(); + return current ^ iList; + }); return i; } @@ -128,4 +136,14 @@ public string GetPath() { return School + "/" + Year + "/" + RankingOrder?.Phase; } + + public MeritTable? GetMerit() + { + return ByMerit; + } + + public List? GetTableCourse() + { + return ByCourse; + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs b/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs index af9d224d..267e800d 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs @@ -22,29 +22,31 @@ public class StudentResult public decimal? Result; public SortedDictionary? SectionsResults; - public int GetHashWithoutLastUpdate() + public List GetHashWithoutLastUpdate() { - var i = "StudentResult".GetHashCode(); - i ^= BirthDate?.GetHashCode() ?? "BirthDate".GetHashCode(); - i ^= EnrollType?.GetHashWithoutLastUpdate() ?? "EnrollType".GetHashCode(); - i ^= EnglishCorrectAnswers?.GetHashCode() ?? "EnglishCorrectAnswers".GetHashCode(); - i ^= Id?.GetHashCode() ?? "Id".GetHashCode(); - i ^= PositionAbsolute?.GetHashCode() ?? "PositionAbsolute".GetHashCode(); - i ^= PositionCourse?.GetHashCode() ?? "PositionCourse".GetHashCode(); - i ^= Result?.GetHashCode() ?? "Result".GetHashCode(); - i ^= EnrollType?.GetHashCode() ?? "EnrollType".GetHashCode(); + var r = new List + { + "StudentResult".GetHashCode(), + BirthDate?.GetHashCode() ?? "BirthDate".GetHashCode(), + EnrollType?.GetHashWithoutLastUpdate() ?? "EnrollType".GetHashCode(), + EnglishCorrectAnswers?.GetHashCode() ?? "EnglishCorrectAnswers".GetHashCode(), + Id?.GetHashCode() ?? "Id".GetHashCode(), + PositionAbsolute?.GetHashCode() ?? "PositionAbsolute".GetHashCode(), + PositionCourse?.GetHashCode() ?? "PositionCourse".GetHashCode(), + Result?.GetHashCode() ?? "Result".GetHashCode() + }; if (Ofa == null) - i ^= "Ofa".GetHashCode(); + r.Add("OfaEmpty".GetHashCode()); else - i = Ofa.Aggregate(i, - (current, variable) => current ^ variable.Key.GetHashCode() ^ variable.Value.GetHashCode()); + r.Add(Ofa.Aggregate("OfaFull".GetHashCode(), + (current, variable) => current ^ variable.Key.GetHashCode() ^ variable.Value.GetHashCode())); if (SectionsResults == null) - i ^= "SectionsResults".GetHashCode(); + r.Add("SectionsResultsEmpty".GetHashCode()); else - i = SectionsResults.Aggregate(i, - (current, variable) => current ^ variable.Key.GetHashCode() ^ variable.Value.GetHashCode()); + r.Add(SectionsResults.Aggregate("SectionsResultsFull".GetHashCode(), + (current, variable) => current ^ variable.Key.GetHashCode() ^ variable.Value.GetHashCode())); - return i; + return r; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs index 7cc848fa..a796b4c5 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Merit/MeritTable.cs @@ -2,6 +2,7 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Parser.Utils; #endregion @@ -16,21 +17,27 @@ public class MeritTable public List? Rows; public int? Year; - public int GetHashWithoutLastUpdate() + public List GetHashWithoutLastUpdate() { - var i = "MeritTable".GetHashCode(); + var r = new List { "MeritTable".GetHashCode() }; if (Headers != null) - i = Headers.Aggregate(i, (current, variable) => current ^ variable.GetHashCode()); + r.Add(Headers.Aggregate("HeadersFull".GetHashCode(), + (current, variable) => current ^ variable.GetHashCode())); else - i ^= "Headers".GetHashCode(); + r.Add("HeadersEmpty".GetHashCode()); if (Rows != null) - i = Rows.Aggregate(i, (current, variable) => current ^ variable.GetHashWithoutLastUpdate()); + r.Add(Rows.Aggregate("RowsFull".GetHashCode(), (current, variable) => + { + var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); + var hashFromListHash = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty3".GetHashCode(); + return current ^ hashFromListHash; + })); else - i ^= "Rows".GetHashCode(); + r.Add("RowsEmpty".GetHashCode()); - i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); - i ^= Path?.GetHashCode() ?? "Path".GetHashCode(); - return i; + r.Add(Year?.GetHashCode() ?? "Year".GetHashCode()); + r.Add(Path?.GetHashCode() ?? "Path".GetHashCode()); + return r; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs b/PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs new file mode 100644 index 00000000..5940ac04 --- /dev/null +++ b/PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs @@ -0,0 +1,14 @@ +namespace PoliNetwork.Graduatorie.Parser.Utils; + +public class Hashing +{ + public static int? GetHashFromListHash(IReadOnlyCollection? iMerit) + { + if (iMerit == null) + return null; + if (iMerit.Count == 0) + return null; + + return iMerit.Aggregate(0, (current, variable) => current ^ variable ?? 0); + } +} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs index cf79fe07..4c32c1a3 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs @@ -34,22 +34,56 @@ private static List GetSaved(string? dataFolder) List list = new(); var filePath = GetFilePath(dataFolder); if (!File.Exists(filePath)) return list; - try - { - var lines = File.ReadAllLines(filePath); - var rankingUrls = from line in lines where !string.IsNullOrEmpty(line) select RankingUrl.From(line); - list.AddRange(rankingUrls); + var lines = GetLines(filePath); + if (lines == null) + { + // consider to handle them + Console.WriteLine($"[ERROR] Can't read the ScraperOutput file ({filePath})"); return list; } + + try + { + foreach (var variable in lines) RankingFromAdd(variable, list); + } catch { // consider to handle them - Console.WriteLine($"[ERROR] Can't read the ScraperOutput file ({filePath})"); - return list; + Console.WriteLine($"[ERROR] Can't validate the ScraperOutput file ({filePath})"); + } + + return list; + } + + private static void RankingFromAdd(string variable, ICollection list) + { + try + { + var rankingUrl = RankingUrl.From(variable); + list.Add(rankingUrl); + } + catch (Exception exception) + { + Console.WriteLine(exception); } } + private static List? GetLines(string filePath) + { + List? lines = null; + try + { + lines = File.ReadAllLines(filePath).Where(x => !string.IsNullOrEmpty(x)).ToList(); + } + catch (Exception exception) + { + Console.WriteLine(exception); + } + + return lines; + } + public static void Write(List rankingsUrls, string? dataFolder) { if (string.IsNullOrEmpty(dataFolder)) @@ -65,14 +99,23 @@ public static void Write(List rankingsUrls, string? dataFolder) private static string GetOutputLinksString(IEnumerable rankingsUrls) { - var output = ""; - var urls = CheckUrlUtil.GetRankingLinksHashSet(rankingsUrls).Order(); - foreach (var link in urls) - { - output += link; - output += "\n"; - } + var rankingLinksHashSet = CheckUrlUtil.GetRankingLinksHashSet(rankingsUrls); + var rankingUrls = rankingLinksHashSet.Where(PredicateStringUrlNotNullNorEmpty); + var urls = rankingUrls.Order(); + + var enumerable1 = urls.Select(link => link.Url); + var select = enumerable1.Select(SelectorUrlWithEndLine); + var enumerable = select.Distinct().Order(); + return enumerable.Aggregate("", (current, linkUrl) => current + linkUrl); + } - return output; + private static bool PredicateStringUrlNotNullNorEmpty(RankingUrl x) + { + return !string.IsNullOrEmpty(x.Url); + } + + private static string SelectorUrlWithEndLine(string url) + { + return url + "\n"; } } \ No newline at end of file