From 8da77db6f994e6c510eb1808eb3b64f7f10cc853 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 17:38:35 +0200 Subject: [PATCH 01/33] refactor: BySchoolYearCourseJson --- .../Data/Constants.cs | 1 + .../Specific/BySchoolYearCourseJson.cs | 300 ++++-------------- .../Objects/Json/SingleCourseJson.cs | 15 + .../Objects/Tables/Course/CourseTable.cs | 16 + 4 files changed, 91 insertions(+), 241 deletions(-) diff --git a/PoliNetwork.Graduatorie.Common/Data/Constants.cs b/PoliNetwork.Graduatorie.Common/Data/Constants.cs index 121af72ba..1edaff74d 100644 --- a/PoliNetwork.Graduatorie.Common/Data/Constants.cs +++ b/PoliNetwork.Graduatorie.Common/Data/Constants.cs @@ -9,4 +9,5 @@ public static class Constants public const string HtmlFolder = "html"; public const string OutputLinksFilename = "links.txt"; public const string DataFolder = "data"; + public const string LocationPlaceholder = "0"; } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index f7f51b21b..ae84a487f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -1,17 +1,20 @@ #region - +using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; -using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; -using PoliNetwork.Graduatorie.Common.Utils.ParallelNS; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; - #endregion namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; +using SchoolsDict = + SortedDictionary>>>>; +using YearsDict = SortedDictionary>>>; +using CoursesDict = SortedDictionary>>; +using CourseDict = SortedDictionary>; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class BySchoolYearCourseJson : IndexJsonBase @@ -19,10 +22,7 @@ public class BySchoolYearCourseJson : IndexJsonBase internal const string PathCustom = "bySchoolYearCourse.json"; //keys: school, year, course, location - public SortedDictionary< - SchoolEnum, - SortedDictionary>>> - > Schools = new(); + public SchoolsDict Schools = new(); public static BySchoolYearCourseJson? From(RankingsSet? set) { @@ -30,15 +30,15 @@ public SortedDictionary< return null; var mainJson = new BySchoolYearCourseJson { LastUpdate = set.LastUpdate }; + // group rankings by school - var bySchool = set.Rankings.GroupBy(r => r.School); + var bySchool = set.Rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); + foreach (var schoolGroup in bySchool) { - if (schoolGroup.Key is null) - continue; - var school = schoolGroup.Key.Value; + var school = schoolGroup.Key; - var byYears = schoolGroup.GroupBy(r => r.Year); + var byYears = schoolGroup.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); var yearsDict = GetYearsDict(byYears); mainJson.Schools.Add(school, yearsDict); @@ -48,249 +48,67 @@ public SortedDictionary< return mainJson; } - private static SortedDictionary< - int, - SortedDictionary>> - > GetYearsDict(IEnumerable> byYears) - { - var d = - new SortedDictionary>>>(); - - foreach (var yearGroup in byYears) GetYearsDictSingle(yearGroup, d); - - return d; - } - - private static void GetYearsDictSingle(IGrouping yearGroup, - IDictionary>>> d) + private static YearsDict GetYearsDict(IEnumerable> byYears) { - if (yearGroup.Key != null) d.Add(yearGroup.Key.Value, GetCoursesDict(yearGroup)); - } - - private static SortedDictionary>> GetCoursesDict( - IEnumerable yearGroup - ) - { - var d = new SortedDictionary>>(); + var yearsDict = new YearsDict(); - foreach (var ranking in yearGroup) + foreach (var yearGroup in byYears) { - if (ranking.ByCourse == null) - continue; - - var byTitle = ranking.ByCourse.GroupBy(c => c.Title); - foreach (var courseGroup in byTitle) - AddCourseToDict(d, ranking, courseGroup); + var coursesDict = GetCoursesDict(yearGroup); + yearsDict.Add(yearGroup.Key, coursesDict); } - return d; + return yearsDict; } - private static void AddCourseToDict( - IDictionary>> d, - Ranking ranking, - IGrouping courseGroup - ) + private static CoursesDict GetCoursesDict(IGrouping yearGroup) { - var title = courseGroup.Key; - if (string.IsNullOrEmpty(title)) - return; - - if (!d.ContainsKey(title)) - d[title] = new SortedDictionary>(); + var coursesDict = new CoursesDict(); - var courseDict = d[title]; - foreach (var course in courseGroup) + foreach (var ranking in yearGroup) { - var location = course.Location; - - // fixedLocation - // esempio: Urbanistica 2022 ha un solo corso senza location, ma anche quello - // deve comparire nella lista - // fix: se un corso non ha location, si inserisce un valore 0 - var fixedLocation = string.IsNullOrEmpty(location) ? "0" : location; - - if (!courseDict.ContainsKey(fixedLocation)) - courseDict[fixedLocation] = new List(); - - var locationDict = courseDict[fixedLocation]; - var singleCourseJson = CreateCourseJson(ranking, course); - - if (locationDict.Any(IsThisCourse)) + if (ranking.ByCourse == null) continue; - locationDict.Add(singleCourseJson); - locationDict.Sort(Comparison); - continue; + var byTitle = + ranking.ByCourse.Where(c => c.Title != null).GroupBy(c => c.Title!); // e.g. INGEGNERIA AEROSPAZIALE - bool IsThisCourse(SingleCourseJson x) - { - return x.Link == singleCourseJson.Link && x.Location == singleCourseJson.Location; - } - } - } - - private static int Comparison(SingleCourseJson x, SingleCourseJson y) - { - return x.CompareTo(y); - } - - private static SingleCourseJson CreateCourseJson(Ranking ranking, CourseTable course) - { - var basePath = ranking.School + "/" + ranking.Year + "/"; - return new SingleCourseJson - { - Link = ranking.GetFilename(), - Id = ranking.GetId(), - BasePath = basePath, - Year = ranking.Year, - School = ranking.School, - Location = course.Location, - RankingOrder = ranking.RankingOrder - }; - } - - private static bool IsSimilar(IEnumerable yearGroup, SingleCourseJson singleCourseJson) - { - var enumerable = yearGroup.Where(v1 => v1.ByCourse != null); - - return enumerable.Any(Predicate); - - bool Predicate(Ranking v1) - { - return singleCourseJson.School == v1.School - && singleCourseJson.Year == v1.Year - && singleCourseJson.RankingOrder?.GetId() == v1.RankingOrder?.GetId(); - } - } - - public static RankingsSet? Parse(string dataFolder) - { - var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, PathCustom); - try - { - var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); - if (mainJson is null) - return null; - - var rankings = RankingsAdd(mainJson, outFolder); - - return new RankingsSet { LastUpdate = mainJson.LastUpdate, Rankings = rankings }; - } - catch - { - // ignored - } - - return null; - } - - private static List RankingsAdd(BySchoolYearCourseJson mainJson, string outFolder) - { - List rankings = new(); - foreach (var school in mainJson.Schools) - foreach (var year in school.Value) - RankingsAddSingleYearSchool(year, outFolder, school, rankings); - - return rankings; - } - - private static void RankingsAddSingleYearSchool( - KeyValuePair>>> year, - string outFolder, - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - ICollection rankings - ) - { - var actions = new List(); - foreach (var filename in year.Value) - { - var collection = filename.Value.Select(Selector); - actions.AddRange(collection); - continue; - - Action Selector(KeyValuePair> variable) + foreach (var courseGroup in byTitle) { - return () => { RankingAdd(school, year, outFolder, variable, rankings); }; + var alreadyExisted = coursesDict.ContainsKey(courseGroup.Key); + var courseDict = alreadyExisted + ? coursesDict[courseGroup.Key] + : new CourseDict(); + + foreach (var courseTable in courseGroup) + { + var location = courseTable.GetFixedLocation(); + if (!courseDict.ContainsKey(location)) + { + // first time this location is encountered, + // so we instantiate the list for this location + var newLocationList = new List(); + courseDict.Add(location, newLocationList); + } + + var locationList = courseDict.GetValueOrDefault(location); + if (locationList == null) + throw new UnreachableException(); // this should never happen at this point + + var singleCourseJson = SingleCourseJson.From(ranking, courseTable); + + if (locationList.Any( + x => x.Id == singleCourseJson.Id && x.Location == singleCourseJson.Location)) + continue; + + locationList.Add(singleCourseJson); + locationList.Sort(); + } + + if (!alreadyExisted) coursesDict.Add(courseGroup.Key, courseDict); } } - ParallelRun.Run(actions.ToArray()); - } - - private static void RankingAdd( - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - KeyValuePair>>> year, - string outFolder, - KeyValuePair> filename, - ICollection rankings - ) - { - foreach (var variable in filename.Value) - RankingAddSingle(school, year, outFolder, rankings, variable); - } - - private static void RankingAddSingle( - KeyValuePair< - SchoolEnum, - SortedDictionary>>> - > school, - KeyValuePair>>> year, - string outFolder, - ICollection rankings, - SingleCourseJson variable - ) - { - var schoolKey = school.Key.ToString(); - var yearKey = year.Key.ToString(); - var path = Path.Join(outFolder, schoolKey, yearKey, variable.Link); - var ranking = Utils.Transformer.ParserNS.Parser.ParseJsonRanking(path); - if (ranking == null) - return; - - lock (rankings) - { - AddToRankings(rankings, ranking); - } - } - - private static void AddToRankings(ICollection rankings, Ranking ranking) - { - var any = rankings.Any( - x => - x.School == ranking.School - && x.Year == ranking.Year - && Similar(x.ByCourse, ranking.ByCourse) - ); - - if (!any) - rankings.Add(ranking); - } - - private static bool Similar( - IReadOnlyCollection? a, - IReadOnlyCollection? b - ) - { - if (a == null || b == null) - return false; - return a.Count == b.Count && a.Select(Selector).All(Predicate); - - bool Selector(CourseTable variable) - { - return b.Any(x => x.Title == variable.Title); - } - - bool Predicate(bool boolB) - { - return boolB; - } + return coursesDict; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs index bdb25dc15..0a6d1b72d 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs @@ -22,6 +22,21 @@ public class SingleCourseJson: IComparable public SchoolEnum? School; public int? Year; + public static SingleCourseJson From(Ranking ranking, CourseTable? course) + { + var basePath = $"{ranking.School}/{ranking.Year}/"; // "Ingegneria/2023" + return new SingleCourseJson + { + Link = ranking.GetFilename(), + Id = ranking.GetId(), + BasePath = basePath, + Year = ranking.Year, + School = ranking.School, + Location = course?.Location, + RankingOrder = ranking.RankingOrder + }; + } + public int GetHashWithoutLastUpdate() { var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs index d9ef3c269..17aac09bc 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs @@ -2,6 +2,7 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; using PoliNetwork.Graduatorie.Parser.Utils.Output; @@ -28,4 +29,19 @@ public RankingSummaryStudent GetRankingSummaryStudent(Ranking ranking) return new RankingSummaryStudent(Title, ranking.RankingOrder?.Phase, ranking.School, ranking.Url, ranking.Year); } + + /// + /// Get the course location if present, otherwise get the placeholder (constant). + /// Useful for index purposes. + /// + /// A string with the location or the placeholder + public string GetFixedLocation() + { + // fixedLocation + // esempio: Urbanistica 2022 ha un solo corso senza location, ma anche quello + // deve comparire nella lista + // fix: se un corso non ha location, si inserisce un valore 0 + if (string.IsNullOrEmpty(Location)) return Constants.LocationPlaceholder; + return Location; + } } \ No newline at end of file From ab1033f31daf89f97c3ec3141b6994c5ead8a531 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 18:10:30 +0200 Subject: [PATCH 02/33] refactor: merge EnrollUtil into EnrollType --- .../Objects/EnrollType.cs | 26 +++++++++++++-- .../Utils/EnrollUtil.cs | 32 ------------------- .../Utils/Transformer/ParserNS/Converter.cs | 6 ++-- .../Utils/Transformer/ParserNS/Parser.cs | 7 ++-- 4 files changed, 30 insertions(+), 41 deletions(-) delete mode 100644 PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs diff --git a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs index 97671355c..dd6b6ac28 100644 --- a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs +++ b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs @@ -1,5 +1,6 @@ #region +using System.Security.Cryptography; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; @@ -11,7 +12,7 @@ namespace PoliNetwork.Graduatorie.Common.Objects; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class EnrollType { - public bool? CanEnroll; + public bool CanEnroll; public string? Course; public string? Type; @@ -20,8 +21,29 @@ public int GetHashWithoutLastUpdate() var i = "EnrollTypeNotNull".GetHashCode(); i ^= Course?.GetHashCode() ?? "Course".GetHashCode(); i ^= Type?.GetHashCode() ?? "Type".GetHashCode(); - i ^= CanEnroll?.GetHashCode() ?? "CanEnroll".GetHashCode(); return i; } + + public static EnrollType From(string? rowCanEnrollInto, bool rowCanEnroll) + { + if (rowCanEnroll == false) + return new EnrollType { CanEnroll = false, Course = null, Type = null }; + + if (string.IsNullOrEmpty(rowCanEnrollInto)) + return new EnrollType { CanEnroll = true, Course = null, Type = null }; + + string[] tester = { "assegnato", "prenotato" }; + const string sep = " - "; + if (!rowCanEnrollInto.Contains(sep) || !tester.Any(t => rowCanEnrollInto.ToLower().Contains(t))) + return new EnrollType { CanEnroll = true, Course = rowCanEnrollInto, Type = null }; + + var s = rowCanEnrollInto.Split(sep).ToList(); + var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower())); + + if (type != null) s.Remove(type); + + var course = string.Join(sep, s); + return new EnrollType { CanEnroll = true, Course = course, Type = type }; + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs b/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs deleted file mode 100644 index a9bf26a76..000000000 --- a/PoliNetwork.Graduatorie.Common/Utils/EnrollUtil.cs +++ /dev/null @@ -1,32 +0,0 @@ -#region - -using PoliNetwork.Graduatorie.Common.Objects; - -#endregion - -namespace PoliNetwork.Graduatorie.Common.Utils; - -public static class EnrollUtil -{ - public static EnrollType GetEnrollType(string? rowCanEnrollInto, bool rowCanEnroll) - { - if (rowCanEnroll == false) - return new EnrollType { CanEnroll = false, Course = null, Type = null }; - - if (string.IsNullOrEmpty(rowCanEnrollInto)) - return new EnrollType { CanEnroll = true, Course = null, Type = null }; - - string[] tester = { "assegnato", "prenotato" }; - const string sep = " - "; - if (!rowCanEnrollInto.Contains(sep) || !tester.Any(t => rowCanEnrollInto.ToLower().Contains(t))) - return new EnrollType { CanEnroll = true, Course = rowCanEnrollInto, Type = null }; - - var s = rowCanEnrollInto.Split(sep).ToList(); - var type = s.FirstOrDefault(x => tester.Any(t => t == x.ToLower())); - - if (type != null) s.Remove(type); - - var course = string.Join(sep, s); - return new EnrollType { CanEnroll = true, Course = course, Type = type }; - } -} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs index 05c3404ee..fc32e7a43 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs @@ -1,6 +1,6 @@ #region -using PoliNetwork.Graduatorie.Common.Utils; +using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; @@ -25,7 +25,7 @@ public static StudentResult FromMeritTableToStudentResult(MeritTableRow row) PositionCourse = null, SectionsResults = null, EnglishCorrectAnswers = null, - EnrollType = EnrollUtil.GetEnrollType(rowCanEnrollInto, rowCanEnroll) + EnrollType = EnrollType.From(rowCanEnrollInto, rowCanEnroll) }; } @@ -39,7 +39,7 @@ public static StudentResult FromCourseTableRowToStudentResult(CourseTableRow row Ofa = row.Ofa, Result = row.Result, BirthDate = row.BirthDate, - EnrollType = EnrollUtil.GetEnrollType(course.CourseTitle, rowCanEnroll), + EnrollType = EnrollType.From(course.CourseTitle, rowCanEnroll), PositionAbsolute = null, PositionCourse = row.Position, SectionsResults = row.SectionsResults, diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs index 781c9e697..0ff79297b 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs @@ -8,7 +8,6 @@ using PoliNetwork.Graduatorie.Common.Extensions; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; -using PoliNetwork.Graduatorie.Common.Utils; using PoliNetwork.Graduatorie.Common.Utils.HashNS; using PoliNetwork.Graduatorie.Parser.Objects; using PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; @@ -606,7 +605,7 @@ Table course Ofa = row.Ofa, Result = row.Result, BirthDate = row.BirthDate, - EnrollType = EnrollUtil.GetEnrollType(course.CourseTitle, canEnroll), + EnrollType = EnrollType.From(course.CourseTitle, canEnroll), PositionCourse = row.Position, SectionsResults = row.SectionsResults, EnglishCorrectAnswers = row.EnglishCorrectAnswers @@ -620,7 +619,7 @@ Table course return student; student.PositionAbsolute = meritRow.Position; - student.EnrollType = EnrollUtil.GetEnrollType(meritRow.CanEnrollInto, canEnroll); + student.EnrollType = EnrollType.From(meritRow.CanEnrollInto, canEnroll); return student; } @@ -643,7 +642,7 @@ IEnumerable courses var canEnroll = row.CanEnroll ?? false; var student = new StudentResult { - EnrollType = EnrollUtil.GetEnrollType(row.CanEnrollInto, canEnroll), + EnrollType = EnrollType.From(row.CanEnrollInto, canEnroll), Id = row.Id, PositionAbsolute = row.Position, Result = row.Result, From ae59f3536cb2511a9e6f6d2e95cb3c27a43c950e Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 18:14:08 +0200 Subject: [PATCH 03/33] refactor: rename method in HashMatricola --- .../Utils/HashNS/HashMatricola.cs | 8 ++++---- .../Utils/Transformer/ParserNS/Parser.cs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs b/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs index f685c4319..6f189ba31 100644 --- a/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs +++ b/PoliNetwork.Graduatorie.Common/Utils/HashNS/HashMatricola.cs @@ -24,7 +24,7 @@ public static partial class HashMatricola return string.IsNullOrEmpty(s) ? null : NotAlphaNumericRegex().Replace(s, ""); } - public static string? HashMatricolaMethod(string? input) + public static string? Get(string? input) { input = CleanInput(input); @@ -33,9 +33,9 @@ public static partial class HashMatricola var stringInputWithSalt = input + SaltGlobal; var hexHash = GetSha256(stringInputWithSalt); - var hashMatricolaMethod = hexHash[..MaxCharHash]; - var matricolaMethod = hashMatricolaMethod.ToLower(); - return matricolaMethod; + var shortHash = hexHash[..MaxCharHash]; + var lowerShortHash = shortHash.ToLower(); + return lowerShortHash; } private static string GetSha256(string stringInputWithSalt) diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs index 0ff79297b..da2585acd 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Parser.cs @@ -444,7 +444,7 @@ private static List ParseMeritTable(Table> table) foreach (var row in table.Data) { - var id = HashMatricola.HashMatricolaMethod(Table.GetFieldByIndex(row, idIndex)); + var id = HashMatricola.Get(Table.GetFieldByIndex(row, idIndex)); var votoTest = Table.GetFieldByIndex(row, votoTestIndex) ?? "0"; var enrollCourse = Table.GetFieldByIndex(row, corsoIndex) ?? ""; var position = Table.GetFieldByIndex(row, posIndex) ?? "-1"; @@ -528,7 +528,7 @@ private static List ParseCourseTable(Table> table) SortedDictionary? sectionsIndex ) { - var id = HashMatricola.HashMatricolaMethod(Table.GetFieldByIndex(row, idIndex)); + var id = HashMatricola.Get(Table.GetFieldByIndex(row, idIndex)); var votoTestString = Table.GetFieldByIndex(row, votoTestIndex)?.Replace(",", ".") ?? "0"; var votoTest = Convert.ToDecimal(votoTestString, Culture.NumberFormatInfo); From 6c929ef5ec0c9e2751945038107a6e4937b95313 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 18:43:10 +0200 Subject: [PATCH 04/33] refactor: rename method in Parser/Program.cs --- PoliNetwork.Graduatorie.Parser/Main/Program.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Main/Program.cs b/PoliNetwork.Graduatorie.Parser/Main/Program.cs index 988ee648d..2f6fb39da 100644 --- a/PoliNetwork.Graduatorie.Parser/Main/Program.cs +++ b/PoliNetwork.Graduatorie.Parser/Main/Program.cs @@ -22,10 +22,10 @@ public static void Main(string[] args) var rankingsUrls = Scraper.Main.Program.RankingsUrls(mt, argsConfig); // esegui ciò che fa il parser (parse + write) - ParserDo(argsConfig, rankingsUrls); + RunParser(argsConfig, rankingsUrls); } - private static void ParserDo(ArgsConfig argsConfig, IEnumerable rankingsUrls) + private static void RunParser(ArgsConfig argsConfig, IEnumerable rankingsUrls) { // ricava un unico set partendo dai file html salvati, dagli url // trovati e dal precedente set salvato nel .json From f0c76c07a1c1468695aaf58e3f3a8d9424b27028 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 19:41:35 +0200 Subject: [PATCH 05/33] refactor: rename param in UpdateDateFound --- PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs index 962fa1c15..faed551f4 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/DateFound.cs @@ -39,10 +39,10 @@ public void WriteToFile(string dataFolder) return date1 < date2 ? date1 : date2; } - public void UpdateDateFound(Ranking variable) + public void UpdateDateFound(Ranking ranking) { - var path = variable.GetPath().Trim(); - var minDateTime = GetMinTime(variable, path); + var path = ranking.GetFullPath().Trim(); + var minDateTime = GetMinTime(ranking, path); SetDate(path, minDateTime); } From e7a33db84cb854e75eba3e56a8d4267baf68d3a2 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 19:41:54 +0200 Subject: [PATCH 06/33] chore: remove unused import --- PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs index dd6b6ac28..2fb946ecb 100644 --- a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs +++ b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs @@ -1,6 +1,5 @@ #region -using System.Security.Cryptography; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; From c3daf44b3563c2bafb81b75dfa95bc89597c23cf Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 19:45:30 +0200 Subject: [PATCH 07/33] refactor: move ranking saving to RankingsSet and Ranking --- .../Objects/Json/Indexes/IndexJsonBase.cs | 113 +----------------- .../Objects/RankingNS/Ranking.cs | 97 ++++++++++----- .../Objects/RankingNS/RankingsSet.cs | 5 + .../Utils/Output/OutputWriteUtil.cs | 6 +- 4 files changed, 78 insertions(+), 143 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index 599b87cad..1dbd70402 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -6,9 +6,6 @@ using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; -using PoliNetwork.Graduatorie.Parser.Utils; #endregion @@ -26,115 +23,9 @@ public void WriteToFile(string outFolder, string pathFile) var mainJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); File.WriteAllText(mainJsonPath, mainJsonString); } - - - public static void WriteSingleJsons(RankingsSet? set, string outFolder, ArgsConfig argsConfig) - { - if (set == null) - return; - - // group rankings by year - var bySchool = set.Rankings.GroupBy(r => r.School); - foreach (var schoolGroup in bySchool) - { - if (schoolGroup.Key is null) - continue; - var school = schoolGroup.Key.Value; - - var byYears = schoolGroup.GroupBy(r => r.Year); - foreach (var yearGroup in byYears) - { - if (yearGroup.Key is null) - continue; - var year = yearGroup.Key.Value; - var folder = Path.Join(outFolder, school.ToString(), year.ToString()); - Directory.CreateDirectory(folder); - - foreach (var ranking in yearGroup) WriteSingleJsonRanking(folder, ranking, argsConfig); - } - } - } - - private static void WriteSingleJsonRanking(string folder, Ranking ranking, ArgsConfig argsConfig) - { - var path = Path.Join(folder, ranking.GetFilename()); - - if (ExitIfAlreadyExistsAndNotUpdated(ranking, path) && !argsConfig.ForceReparsing) return; - - var rankingJsonString = JsonConvert.SerializeObject(ranking, Culture.JsonSerializerSettings); - File.WriteAllText(path, rankingJsonString); - } - - private static bool ExitIfAlreadyExistsAndNotUpdated(Ranking a, string path) - { - if (!File.Exists(path)) return false; - var b = GetRankingFromFile(path); - return b != null && SameHash(a, b); - } - - private static bool SameHash(Ranking a, Ranking b) - { - var ai = a.GetHashWithoutLastUpdate(); - var bi = b.GetHashWithoutLastUpdate(); - return ai == bi; - } - - private static bool SameHashCourse(IReadOnlyCollection? aTableCourse, - IReadOnlyCollection? bTableCourse) - { - if (aTableCourse == null && bTableCourse == null) - return true; - if (aTableCourse == null || bTableCourse == null) - return false; - - if (aTableCourse.Count != bTableCourse.Count) - return false; - - var aHash = aTableCourse.Select(variable => - { - var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate()); - return hashWithoutLastUpdate; - }).ToList(); - - var bHash = bTableCourse.Select(variable => - { - var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate()); - return hashWithoutLastUpdate; - }).ToList(); - - var ai = Hashing.GetHashFromListHash(aHash); - var bi = Hashing.GetHashFromListHash(bHash); - - return (ai ?? 0) == (bi ?? 0); - } - - private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMerit) + + public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder, ArgsConfig argsConfig) { - if (aTableMerit == null && bTableMerit == null) - return true; - if (aTableMerit == null || bTableMerit == null) - return false; - - var ai = aTableMerit.GetHashWithoutLastUpdate(); - var bi = bTableMerit.GetHashWithoutLastUpdate(); - var aii = Hashing.GetHashFromListHash(ai) ?? 0; - var bii = Hashing.GetHashFromListHash(bi) ?? 0; - return aii == bii; - } - - private static Ranking? GetRankingFromFile(string path) - { - var x = File.ReadAllText(path); - - var j = JsonConvert.DeserializeObject(x, Culture.JsonSerializerSettings); - return j; - } - - public static void IndexesWrite(RankingsSet? rankingsSet, string outFolder, ArgsConfig argsConfig) - { - //let's write all single json files - WriteSingleJsons(rankingsSet, outFolder, argsConfig); - //now let's write each single different index BySchoolYearJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearJson.PathCustom); ByYearSchoolJson.From(rankingsSet)?.WriteToFile(outFolder, ByYearSchoolJson.PathCustom); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index 4a4fad20f..aed019d17 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -3,6 +3,7 @@ using System.Globalization; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; using PoliNetwork.Graduatorie.Parser.Objects.Json; @@ -18,7 +19,7 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.RankingNS; [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] -public class Ranking : IComparable +public class Ranking : IComparable, IEquatable { public List? ByCourse; public MeritTable? ByMerit; @@ -34,6 +35,15 @@ public RankingSummaryStudent GetRankingSummaryStudent() { return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url); } + + public static Ranking? FromJson(string fullPath) + { + if (!File.Exists(fullPath)) return null; + + var str = File.ReadAllText(fullPath); + var ranking = JsonConvert.DeserializeObject(str, Culture.JsonSerializerSettings); + return ranking; + } public int CompareTo(Ranking? other) { @@ -44,33 +54,10 @@ public int CompareTo(Ranking? other) } - /*** - * Ottieni l'hash senza considerare il valore di LastUpdate - */ - public int GetHashWithoutLastUpdate() + public bool Equals(Ranking? other) { - var i = "Ranking".GetHashCode(); - i ^= Extra?.GetHashCode() ?? "Extra".GetHashCode(); - i ^= RankingOrder?.GetHashWithoutLastUpdate() ?? "RankingOrder".GetHashCode(); - i ^= RankingSummary?.GetHashWithoutLastUpdate() ?? "RankingSummary".GetHashCode(); - i ^= School?.GetHashCode() ?? "School".GetHashCode(); - i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode(); - i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); - var iMerit = ByMerit?.GetHashWithoutLastUpdate(); - i ^= Hashing.GetHashFromListHash(iMerit) ?? "ByMerit".GetHashCode(); - - - if (ByCourse == null) - i ^= "ByCourse".GetHashCode(); - else - i = ByCourse.Aggregate(i, (current, variable) => - { - var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - var iList = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty".GetHashCode(); - return current ^ iList; - }); - - return i; + if (other == null) return false; + return GetHashWithoutLastUpdate() == other.GetHashWithoutLastUpdate(); } @@ -83,7 +70,6 @@ public bool IsSimilarTo(Ranking ranking) Url?.Url == ranking.Url?.Url; } - public void Merge(Ranking ranking) { LastUpdate = LastUpdate > ranking.LastUpdate ? LastUpdate : ranking.LastUpdate; @@ -159,8 +145,59 @@ public RankingSummary CreateSummary() return RankingSummary.From(this); } - public string GetPath() + public string GetBasePath(string outFolder = "") + { + return Path.Join(outFolder, $"{School}/{Year}/"); + } + + public string GetFullPath(string outFolder = "") + { + return Path.Join(GetBasePath(outFolder), GetFilename()); + } + + public void WriteAsJson(string outFolder, bool forceReparse = false) { - return School + "/" + Year + "/" + RankingOrder?.Phase; + var folderPath = GetBasePath(outFolder); + Directory.CreateDirectory(folderPath); + + var fullPath = GetFullPath(outFolder); + + var savedRanking = FromJson(fullPath); + var equalsSaved = savedRanking != null && Equals(savedRanking); + + if (forceReparse || equalsSaved || savedRanking == null) + { + var rankingJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); + File.WriteAllText(fullPath, rankingJsonString); + } + } + + /*** + * Ottieni l'hash senza considerare il valore di LastUpdate + */ + public int GetHashWithoutLastUpdate() + { + var i = "Ranking".GetHashCode(); + i ^= Extra?.GetHashCode() ?? "Extra".GetHashCode(); + i ^= RankingOrder?.GetHashWithoutLastUpdate() ?? "RankingOrder".GetHashCode(); + i ^= RankingSummary?.GetHashWithoutLastUpdate() ?? "RankingSummary".GetHashCode(); + i ^= School?.GetHashCode() ?? "School".GetHashCode(); + i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode(); + i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); + var iMerit = ByMerit?.GetHashWithoutLastUpdate(); + i ^= Hashing.GetHashFromListHash(iMerit) ?? "ByMerit".GetHashCode(); + + + if (ByCourse == null) + i ^= "ByCourse".GetHashCode(); + else + i = ByCourse.Aggregate(i, (current, variable) => + { + var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); + var iList = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty".GetHashCode(); + return current ^ iList; + }); + + return i; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs index fbcae97fb..092ee92c9 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs @@ -39,4 +39,9 @@ public void Merge(RankingsSet set) { foreach (var ranking in set.Rankings) AddRanking(ranking); } + + public void WriteAllRankings(string outFolder, bool forceReparse = false) + { + foreach (var ranking in Rankings) ranking.WriteAsJson(outFolder, forceReparse); + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs index 54b4dd470..f8dfad6f0 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs @@ -20,10 +20,12 @@ public OutputWriteUtil(ArgsConfig argsConfig) _config = argsConfig; } - public void SaveOutputs(RankingsSet? rankingsSet, DateFound dateFound) + public void SaveOutputs(RankingsSet rankingsSet, DateFound dateFound) { var outFolder = Path.Join(_config.DataFolder, Constants.OutputFolder); - IndexJsonBase.IndexesWrite(rankingsSet, outFolder, _config); + + rankingsSet.WriteAllRankings(outFolder, _config.ForceReparsing); + IndexJsonBase.WriteAllIndexes(rankingsSet, outFolder, _config); StatsJson.Write(outFolder, rankingsSet, _config); HashMatricoleWrite.Write(rankingsSet, outFolder); From 2f4ac69780527204de984b0f2869f5fa42684625 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Fri, 5 Jul 2024 20:09:03 +0200 Subject: [PATCH 08/33] chore: remove fake-null param --- .../Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index ae84a487f..02e5ed7cf 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -24,11 +24,8 @@ public class BySchoolYearCourseJson : IndexJsonBase //keys: school, year, course, location public SchoolsDict Schools = new(); - public static BySchoolYearCourseJson? From(RankingsSet? set) + public static BySchoolYearCourseJson From(RankingsSet set) { - if (set == null) - return null; - var mainJson = new BySchoolYearCourseJson { LastUpdate = set.LastUpdate }; // group rankings by school From e226379ab13f6e77c9652d0d005daf8cd885b968 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Sat, 6 Jul 2024 17:06:15 +0200 Subject: [PATCH 09/33] refactor: BySchoolYearJson.cs --- .../Objects/Json/Indexes/IndexJsonBase.cs | 6 +- .../Specific/BySchoolYearCourseJson.cs | 2 +- .../Json/Indexes/Specific/BySchoolYearJson.cs | 120 +++++++----------- .../Json/Indexes/Specific/ByYearSchoolJson.cs | 4 +- .../Objects/Json/SingleCourseJson.cs | 5 + .../Objects/RankingNS/Ranking.cs | 11 +- 6 files changed, 63 insertions(+), 85 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index 1dbd70402..ab52eaad6 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -27,8 +27,8 @@ public void WriteToFile(string outFolder, string pathFile) public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder, ArgsConfig argsConfig) { //now let's write each single different index - BySchoolYearJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearJson.PathCustom); - ByYearSchoolJson.From(rankingsSet)?.WriteToFile(outFolder, ByYearSchoolJson.PathCustom); - BySchoolYearCourseJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearCourseJson.PathCustom); + BySchoolYearJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearJson.CustomPath); + ByYearSchoolJson.From(rankingsSet)?.WriteToFile(outFolder, ByYearSchoolJson.CustomPath); + BySchoolYearCourseJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearCourseJson.CustomPath); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index 02e5ed7cf..2be3191bb 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -19,7 +19,7 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class BySchoolYearCourseJson : IndexJsonBase { - internal const string PathCustom = "bySchoolYearCourse.json"; + internal const string CustomPath = "bySchoolYearCourse.json"; //keys: school, year, course, location public SchoolsDict Schools = new(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs index d93cd3fa6..a7436c0e1 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs @@ -10,80 +10,70 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; +using SchoolsDict = SortedDictionary>>; +using YearsDict = SortedDictionary>; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class BySchoolYearJson : IndexJsonBase { - internal const string PathCustom = "bySchoolYear.json"; + internal const string CustomPath = "bySchoolYear.json"; - public SortedDictionary>> Schools = new(); + public SchoolsDict Schools = new(); + public List All = new(); // decide whether include it in the json serialization - public static BySchoolYearJson? From(RankingsSet? set) + public static BySchoolYearJson From(RankingsSet set) { - if (set == null) - return null; - var mainJson = new BySchoolYearJson { LastUpdate = set.LastUpdate }; + + var list = set.Rankings.SelectMany(r => r.ToSingleCourseJson()).ToList(); + list.Sort(); + mainJson.All = list; + // group rankings by school - var bySchool = set.Rankings.GroupBy(r => r.School); + var bySchool = set.Rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); foreach (var schoolGroup in bySchool) { - if (schoolGroup.Key is null) - continue; - var school = schoolGroup.Key.Value; - - var schoolDict = new SortedDictionary>(); - - var byYears = schoolGroup.GroupBy(r => r.Year); - foreach (var yearGroup in byYears) - { - if (yearGroup.Key is null) - continue; - AddSchool(yearGroup, schoolDict); - } + var school = schoolGroup.Key; + var byYears = schoolGroup.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); - mainJson.Schools.Add(school, schoolDict); + var yearsDict = GetYearsDict(byYears); + mainJson.Schools.Add(school, yearsDict); } return mainJson; } - private static void AddSchool( - IGrouping yearGroup, - IDictionary> schoolDict - ) + private static YearsDict GetYearsDict(IEnumerable> byYears) { - var yearGroupKey = yearGroup.Key; - if (yearGroupKey == null) - return; - - var singleCourseJsons = yearGroup - .SelectMany(ranking => ranking.ToSingleCourseJson()) - .DistinctBy(x => x.Link) - .ToList(); - var filenames = singleCourseJsons - .OrderBy(a => a.Id) - .ThenBy(a => a.Year) - .ThenBy(a => a.School) - .ThenBy(a => a.BasePath) - .ToList(); - - schoolDict.Add(yearGroupKey.Value, filenames); - } + var yearsDict = new YearsDict(); + + foreach (var yearGroup in byYears) + { + var singleCourseJsons = yearGroup + .SelectMany(r => r.ToSingleCourseJson()) + .DistinctBy(r => r.Id) + .OrderBy(e => e.Id) // Id contains everything (school, year, pri/sec phase, extraeu, lang) + .ToList(); + + yearsDict.Add(yearGroup.Key, singleCourseJsons); + } + return yearsDict; + } public static RankingsSet GetAndParse(string dataFolder) { var set = new RankingsSet(); var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, PathCustom); + var mainJsonPath = Path.Join(outFolder, CustomPath); try { - var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); - if (mainJson is null) return set; + var index = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); + if (index is null) return set; - set.LastUpdate = mainJson.LastUpdate; - set.Rankings = GetRankingsFromIndex(mainJson, outFolder); + set.LastUpdate = index.LastUpdate; + set.Rankings = index.GetRankings(outFolder); set.Rankings.Sort(); return set; } @@ -94,37 +84,17 @@ public static RankingsSet GetAndParse(string dataFolder) } } - private static List GetRankingsFromIndex(BySchoolYearJson mainJson, string outFolder) + public List GetRankings(string outFolder) { List rankings = new(); - var singleCourseJsons = GetSingleCourseJsons(mainJson).ToList(); - singleCourseJsons.Sort(); - foreach (var filename in singleCourseJsons) - AddRanking(outFolder, filename, rankings); + foreach (var singleCourseJson in All) + { + var fullPath = singleCourseJson.GetFullPath(outFolder); + var ranking = Ranking.FromJson(fullPath); + if (ranking != null) rankings.Add(ranking); + } + return rankings; } - - private static IEnumerable GetSingleCourseJsons(BySchoolYearJson mainJson) - { - var singleCourseJsons = mainJson.Schools.SelectMany( - school => - { - var courseJsons = school.Value.SelectMany(year => - { - var yearValue = year.Value; - return yearValue; - }); - return courseJsons; - }); - return singleCourseJsons; - } - - private static void AddRanking(string outFolder, SingleCourseJson filename, ICollection rankings) - { - var path = Path.Join(outFolder, filename.BasePath, filename.Link); - var ranking = Utils.Transformer.ParserNS.Parser.ParseJsonRanking(path); - if (ranking == null) return; - rankings.Add(ranking); - } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs index 7206836c1..02a39c52a 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs @@ -14,7 +14,7 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class ByYearSchoolJson : IndexJsonBase { - internal const string PathCustom = "byYearSchool.json"; + internal const string CustomPath = "byYearSchool.json"; public SortedDictionary>> Years = new(); @@ -56,7 +56,7 @@ public class ByYearSchoolJson : IndexJsonBase public static RankingsSet? Parse(string dataFolder) { var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, PathCustom); + var mainJsonPath = Path.Join(outFolder, CustomPath); try { var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs index 0a6d1b72d..ff9e104e1 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs @@ -76,4 +76,9 @@ public bool Is(CourseTable courseTable) { return (RankingOrder?.Phase ?? "") == courseTable.Title; } + + public string GetFullPath(string outFolder = "") + { + return Path.Join(outFolder, BasePath, Link); + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index aed019d17..cc51c74d7 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -38,11 +38,14 @@ public RankingSummaryStudent GetRankingSummaryStudent() public static Ranking? FromJson(string fullPath) { - if (!File.Exists(fullPath)) return null; + // if (!File.Exists(fullPath)) return null; + // + // var str = File.ReadAllText(fullPath); + // var ranking = JsonConvert.DeserializeObject(str, Culture.JsonSerializerSettings); + // return ranking; - var str = File.ReadAllText(fullPath); - var ranking = JsonConvert.DeserializeObject(str, Culture.JsonSerializerSettings); - return ranking; + // consider merging the two functions at some point + return Utils.Transformer.ParserNS.Parser.ParseJsonRanking(fullPath); } public int CompareTo(Ranking? other) From ec6ae7dd069601a2ff0599c7b9486d895156bcc5 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Sat, 6 Jul 2024 20:58:39 +0200 Subject: [PATCH 10/33] fix code related to merge --- .../Objects/Json/Indexes/IndexJsonBase.cs | 4 ++-- .../Objects/Json/SingleCourseJson.cs | 2 +- PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index ab52eaad6..f3c278240 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -27,8 +27,8 @@ public void WriteToFile(string outFolder, string pathFile) public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder, ArgsConfig argsConfig) { //now let's write each single different index - BySchoolYearJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearJson.CustomPath); + BySchoolYearJson.From(rankingsSet).WriteToFile(outFolder, BySchoolYearJson.CustomPath); ByYearSchoolJson.From(rankingsSet)?.WriteToFile(outFolder, ByYearSchoolJson.CustomPath); - BySchoolYearCourseJson.From(rankingsSet)?.WriteToFile(outFolder, BySchoolYearCourseJson.CustomPath); + BySchoolYearCourseJson.From(rankingsSet).WriteToFile(outFolder, BySchoolYearCourseJson.CustomPath); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs index d5f1687b5..999386523 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs @@ -60,7 +60,7 @@ public int CompareTo(SingleCourseJson? singleCourseJson) return 0; } - + public int GetHashWithoutLastUpdate() { var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index db9c7f129..c2e1dec31 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -10,7 +10,6 @@ using PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; -using PoliNetwork.Graduatorie.Parser.Utils; using PoliNetwork.Graduatorie.Parser.Utils.Output; #endregion @@ -189,7 +188,7 @@ public int GetHashWithoutLastUpdate() i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode(); i ^= Year?.GetHashCode() ?? "Year".GetHashCode(); var iMerit = ByMerit?.GetHashWithoutLastUpdate(); - i ^= Hashing.GetHashFromListHash(iMerit) ?? "ByMerit".GetHashCode(); + i ^= iMerit ?? "ByMerit".GetHashCode(); if (ByCourse == null) @@ -198,7 +197,7 @@ public int GetHashWithoutLastUpdate() i = ByCourse.Aggregate(i, (current, variable) => { var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - var iList = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty".GetHashCode(); + var iList = hashWithoutLastUpdate; return current ^ iList; }); From 5bfe9b22809f02cfe15bf6c18c59b246ee571ee8 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Sat, 6 Jul 2024 21:08:18 +0200 Subject: [PATCH 11/33] refactor: ByYearSchoolJson.cs --- .../Specific/BySchoolYearCourseJson.cs | 5 + .../Json/Indexes/Specific/BySchoolYearJson.cs | 2 +- .../Json/Indexes/Specific/ByYearSchoolJson.cs | 94 ++++++------------- 3 files changed, 33 insertions(+), 68 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index 2be3191bb..9f48e104b 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -23,10 +23,15 @@ public class BySchoolYearCourseJson : IndexJsonBase //keys: school, year, course, location public SchoolsDict Schools = new(); + public List All = new(); // decide whether to include it in the json serialization public static BySchoolYearCourseJson From(RankingsSet set) { var mainJson = new BySchoolYearCourseJson { LastUpdate = set.LastUpdate }; + + var list = set.Rankings.SelectMany(r => r.ToSingleCourseJson()).ToList(); + list.Sort(); + mainJson.All = list; // group rankings by school var bySchool = set.Rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs index a7436c0e1..8553bc94f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs @@ -20,7 +20,7 @@ public class BySchoolYearJson : IndexJsonBase internal const string CustomPath = "bySchoolYear.json"; public SchoolsDict Schools = new(); - public List All = new(); // decide whether include it in the json serialization + public List All = new(); // decide whether to include it in the json serialization public static BySchoolYearJson From(RankingsSet set) { diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs index 02a39c52a..bfebf688f 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs @@ -10,94 +10,54 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; +using YearsDict = SortedDictionary>>; +using SchoolsDict = SortedDictionary>; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class ByYearSchoolJson : IndexJsonBase { internal const string CustomPath = "byYearSchool.json"; - public SortedDictionary>> Years = new(); + public YearsDict Years = new(); + public List All = new(); // decide whether to include it in the json serialization - public static ByYearSchoolJson? From(RankingsSet? set) + public static ByYearSchoolJson From(RankingsSet set) { - if (set == null) - return null; - var mainJson = new ByYearSchoolJson { LastUpdate = set.LastUpdate }; + + var list = set.Rankings.SelectMany(r => r.ToSingleCourseJson()).ToList(); + list.Sort(); + mainJson.All = list; + // group rankings by year - var byYear = set.Rankings.GroupBy(r => r.Year); + var byYear = set.Rankings.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); foreach (var yearGroup in byYear) { - if (yearGroup.Key is null) - continue; - var year = yearGroup.Key.Value; - - var yearDict = new SortedDictionary>(); - - var bySchools = yearGroup.GroupBy(r => r.School); - foreach (var schoolGroup in bySchools) - { - if (schoolGroup.Key is null) - continue; - var filenames = schoolGroup - .SelectMany(ranking => ranking.ToSingleCourseJson()) - .DistinctBy(x => x.Link) - .ToList().OrderBy(a => a.Id); - yearDict.Add(schoolGroup.Key.Value, filenames); - } + var year = yearGroup.Key; + var bySchools = yearGroup.Where(r => r.School != null).GroupBy(r => r.School!.Value); - mainJson.Years.Add(year, yearDict); + var schoolsDict = GetSchoolsDict(bySchools); + mainJson.Years.Add(year, schoolsDict); } return mainJson; } - - public static RankingsSet? Parse(string dataFolder) + private static SchoolsDict GetSchoolsDict(IEnumerable> bySchools) { - var outFolder = Path.Join(dataFolder, Constants.OutputFolder); - var mainJsonPath = Path.Join(outFolder, CustomPath); - try - { - var mainJson = Utils.Transformer.ParserNS.Parser.ParseJson(mainJsonPath); - if (mainJson is null) - return null; - - var rankings = RankingsAdd(mainJson, outFolder); - - return new RankingsSet { LastUpdate = mainJson.LastUpdate, Rankings = rankings }; - } - catch + var schoolsDict = new SchoolsDict(); + foreach (var schoolGroup in bySchools) { - // ignored + var filenames = schoolGroup + .SelectMany(ranking => ranking.ToSingleCourseJson()) + .DistinctBy(x => x.Link) + .OrderBy(r => r.Id) + .ToList(); + + schoolsDict.Add(schoolGroup.Key, filenames); } - return null; - } - - private static List RankingsAdd(ByYearSchoolJson mainJson, string outFolder) - { - List rankings = new(); - foreach (var year in mainJson.Years) - foreach (var school in year.Value) - foreach (var filename in school.Value) - RankingAdd(year, school, outFolder, filename, rankings); - - return rankings; - } - - private static void RankingAdd( - KeyValuePair>> year, - KeyValuePair> school, - string outFolder, - SingleCourseJson filename, - ICollection rankings) - { - var schoolKey = school.Key.ToString(); - var yearKey = year.Key.ToString(); - var path = Path.Join(outFolder, schoolKey, yearKey, filename.Link); - var ranking = Utils.Transformer.ParserNS.Parser.ParseJsonRanking(path); - if (ranking != null) - rankings.Add(ranking); + return schoolsDict; } } \ No newline at end of file From 7a83b42194572b54b779b8101590fbb6a8cc115b Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Sat, 6 Jul 2024 21:13:29 +0200 Subject: [PATCH 12/33] chore: remove orphan nullable attribute from RankingsSet params --- .../Objects/Json/Indexes/IndexJsonBase.cs | 2 +- .../Json/Indexes/Specific/ByYearSchoolJson.cs | 1 - .../Objects/Json/Stats/StatsJson.cs | 15 +++------------ .../Utils/DateFoundUtil.cs | 7 +++---- .../Utils/Output/HashMatricoleWrite.cs | 5 +---- 5 files changed, 8 insertions(+), 22 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index f3c278240..2bc029832 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -28,7 +28,7 @@ public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder, Ar { //now let's write each single different index BySchoolYearJson.From(rankingsSet).WriteToFile(outFolder, BySchoolYearJson.CustomPath); - ByYearSchoolJson.From(rankingsSet)?.WriteToFile(outFolder, ByYearSchoolJson.CustomPath); + ByYearSchoolJson.From(rankingsSet).WriteToFile(outFolder, ByYearSchoolJson.CustomPath); BySchoolYearCourseJson.From(rankingsSet).WriteToFile(outFolder, BySchoolYearCourseJson.CustomPath); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs index bfebf688f..247bba594 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs @@ -2,7 +2,6 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; -using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs index f3c394c0e..926db9374 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs @@ -19,18 +19,14 @@ public class StatsJson public DateTime LastUpdate = DateTime.UtcNow; public SortedDictionary Stats = new(); - public static void Write(string outFolder, RankingsSet? rankingsSet, ArgsConfig argsConfig) + public static void Write(string outFolder, RankingsSet rankingsSet, ArgsConfig argsConfig) { var statsJson = Generate(rankingsSet); - if (statsJson == null) return; foreach (var yearDict in statsJson.Stats) WriteToFileYear(outFolder, yearDict, argsConfig); } - private static StatsJson? Generate(RankingsSet? rankingsSet) + private static StatsJson Generate(RankingsSet rankingsSet) { - if (rankingsSet == null) - return null; - var statsJson = new StatsJson(); foreach (var ranking in rankingsSet.Rankings) GenerateSingleRanking(rankingsSet, ranking, statsJson); @@ -45,13 +41,8 @@ public static void Write(string outFolder, RankingsSet? rankingsSet, ArgsConfig return statsJson; } - private static void GenerateSingleRanking(RankingsSet? rankingsSet, Ranking ranking, StatsJson? statsJson) + private static void GenerateSingleRanking(RankingsSet rankingsSet, Ranking ranking, StatsJson statsJson) { - if (rankingsSet == null) - return; - if (statsJson == null) - return; - if (ranking.Year == null) return; if (!statsJson.Stats.ContainsKey(ranking.Year.Value)) { diff --git a/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs b/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs index f64f8c2bb..a4566b47d 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/DateFoundUtil.cs @@ -11,19 +11,18 @@ namespace PoliNetwork.Graduatorie.Parser.Utils; public static class DateFoundUtil { - public static DateFound GetDateFound(ArgsConfig argsConfig, RankingsSet? rankingsSet) + public static DateFound GetDateFound(ArgsConfig argsConfig, RankingsSet rankingsSet) { var dateFound = GetDateFoundFromFile(argsConfig.DataFolder); dateFound = UpdateDateFound(rankingsSet, dateFound); return dateFound; } - private static DateFound UpdateDateFound(RankingsSet? rankingsSet, DateFound? dateFound) + private static DateFound UpdateDateFound(RankingsSet rankingsSet, DateFound? dateFound) { dateFound ??= new DateFound(); - var rankingsSetRankings = rankingsSet?.Rankings; - if (rankingsSetRankings == null) return dateFound; + var rankingsSetRankings = rankingsSet.Rankings; foreach (var variable in rankingsSetRankings) dateFound.UpdateDateFound(variable); return dateFound; diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs index 76a10f208..9036330d1 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs @@ -12,11 +12,8 @@ namespace PoliNetwork.Graduatorie.Parser.Utils.Output; public static class HashMatricoleWrite { - public static void Write(RankingsSet? rankingsSet, string outFolder) + public static void Write(RankingsSet rankingsSet, string outFolder) { - if (rankingsSet == null) - return; - var dictionary = GetDictToWrite(rankingsSet); Sort2(dictionary); WriteToFile(dictionary, outFolder); From 14a9bc818b43e90d306c22c82648c8d2b61acabf Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Sat, 6 Jul 2024 23:09:54 +0200 Subject: [PATCH 13/33] refactor: StatsJson.cs + rename StatsSingleJson.cs method --- .../Objects/Json/Stats/StatsJson.cs | 4 ++-- .../Objects/Json/Stats/StatsSingleJson.cs | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs index 926db9374..5bdeaab46 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs @@ -21,11 +21,11 @@ public class StatsJson public static void Write(string outFolder, RankingsSet rankingsSet, ArgsConfig argsConfig) { - var statsJson = Generate(rankingsSet); + var statsJson = From(rankingsSet); foreach (var yearDict in statsJson.Stats) WriteToFileYear(outFolder, yearDict, argsConfig); } - private static StatsJson Generate(RankingsSet rankingsSet) + private static StatsJson From(RankingsSet rankingsSet) { var statsJson = new StatsJson(); foreach (var ranking in rankingsSet.Rankings) GenerateSingleRanking(rankingsSet, ranking, statsJson); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs index 8b503b810..9e134e4c1 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs @@ -12,21 +12,27 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsSingleCourseJson { - public SingleCourseJson? SingleCourseJson; - public RankingSummary? Stats; + public SingleCourseJson SingleCourseJson; + public RankingSummary Stats; + + public StatsSingleCourseJson(SingleCourseJson singleCourseJson, RankingSummary stats) + { + SingleCourseJson = singleCourseJson; + Stats = stats; + } public static List From(Ranking ranking) { var singleCourseJsons = ranking.ToSingleCourseJson(); - return singleCourseJsons.Select(variable => new StatsSingleCourseJson - { SingleCourseJson = variable, Stats = ranking.RankingSummary }).ToList(); + if (ranking.RankingSummary != null) ranking.RankingSummary = ranking.CreateSummary(); + return singleCourseJsons.Select(scj => new StatsSingleCourseJson(scj, ranking.RankingSummary!)).ToList(); } public int GetHashWithoutLastUpdate() { var i = "StatsSingleCourseJson".GetHashCode(); - i ^= SingleCourseJson?.GetHashWithoutLastUpdate() ?? "SingleCourseJson".GetHashCode(); - i ^= Stats?.GetHashWithoutLastUpdate() ?? "Stats".GetHashCode(); + i ^= SingleCourseJson.GetHashWithoutLastUpdate(); + i ^= Stats.GetHashWithoutLastUpdate(); return i; } From e86ed6dc0f04af9e0d2cfda09af2ed4b62937e05 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Sat, 6 Jul 2024 23:15:48 +0200 Subject: [PATCH 14/33] refactor: StatsSchool.cs --- .../Objects/Json/Stats/StatsJson.cs | 9 ++++----- .../Objects/Json/Stats/StatsSchool.cs | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs index 5bdeaab46..bf24e8f74 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs @@ -34,7 +34,7 @@ private static StatsJson From(RankingsSet rankingsSet) foreach (var school in statsJson.Stats[year].Schools.Keys) { var statsSingleCourseJsons = - statsJson.Stats[year].Schools[school].List.OrderBy(x => x.SingleCourseJson?.Link); + statsJson.Stats[year].Schools[school].List.OrderBy(x => x.SingleCourseJson.Link); statsJson.Stats[year].Schools[school].List = statsSingleCourseJsons.ToList(); } @@ -58,18 +58,17 @@ private static void GenerateSingleRanking(RankingsSet rankingsSet, Ranking ranki var schools = statsJson.Stats[ranking.Year.Value].Schools; if (!schools.ContainsKey(ranking.School.Value)) { + var rankings = rankingsSet.Rankings.Where(r => r.Year == ranking.Year && r.School == ranking.School); var statsSchool = new StatsSchool { - NumStudents = rankingsSet.Rankings - .Where(x => x.Year == ranking.Year && x.School == ranking.School) - .Select(x => x.RankingSummary?.HowManyStudents).Sum() + NumStudents = rankings.Select(x => (x.RankingSummary ?? x.CreateSummary()).HowManyStudents ?? 0).Sum() }; schools[ranking.School.Value] = statsSchool; } var statsSingleCourseJsons = ranking.ToStats().DistinctBy(x => new { - x.SingleCourseJson?.Link, x.SingleCourseJson?.Location + x.SingleCourseJson.Link, x.SingleCourseJson.Location }); foreach (var variable in statsSingleCourseJsons) schools[ranking.School.Value].List.Add(variable); } diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs index 8b2339eb5..95bd1c1fb 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs @@ -12,11 +12,11 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; public class StatsSchool { public List List = new(); - public int? NumStudents; - + public int NumStudents; + public int GetHashWithoutLastUpdate() { - var i = NumStudents ?? "NumStudents".GetHashCode(); + var i = NumStudents; return List.Aggregate(i, (current, variable) => current ^ variable.GetHashWithoutLastUpdate()); } } \ No newline at end of file From 94a694f4e6faaab2615869bbc23dbcc9a11cc1a8 Mon Sep 17 00:00:00 2001 From: Lorenzo Corallo Date: Sat, 6 Jul 2024 23:16:57 +0200 Subject: [PATCH 15/33] refactor: StatsYear.cs --- .../Objects/Json/Stats/StatsJson.cs | 2 +- .../Objects/Json/Stats/StatsYear.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs index bf24e8f74..b18203166 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs @@ -49,7 +49,7 @@ private static void GenerateSingleRanking(RankingsSet rankingsSet, Ranking ranki var statsJsonStat = new StatsYear { NumStudents = rankingsSet.Rankings.Where(x => x.Year == ranking.Year) - .Select(x => x.RankingSummary?.HowManyStudents).Sum() + .Select(x => (x.RankingSummary ?? x.CreateSummary()).HowManyStudents ?? 0).Sum() }; statsJson.Stats[ranking.Year.Value] = statsJsonStat; } diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs index a31c7f119..3f0e74fe1 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs @@ -12,12 +12,12 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsYear { - public int? NumStudents; public SortedDictionary Schools = new(); + public int NumStudents; public int GetHashWithoutLastUpdate() { - var i = NumStudents ?? "NumStudents".GetHashCode(); + var i = NumStudents; var enumerable = from variable in Schools let variableKey = (int)variable.Key From 64b7b14b4bb246e303804aa2c3aa8dce691ec0ee Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 17:57:04 +0200 Subject: [PATCH 16/33] refactor: rename WriteToFile to Write for consistency --- .../Objects/Json/Indexes/IndexJsonBase.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index 2bc029832..a21bbaea5 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -17,7 +17,7 @@ public abstract class IndexJsonBase { public DateTime? LastUpdate; - public void WriteToFile(string outFolder, string pathFile) + public void Write(string outFolder, string pathFile) { var mainJsonPath = Path.Join(outFolder, pathFile); var mainJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); @@ -27,8 +27,8 @@ public void WriteToFile(string outFolder, string pathFile) public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder, ArgsConfig argsConfig) { //now let's write each single different index - BySchoolYearJson.From(rankingsSet).WriteToFile(outFolder, BySchoolYearJson.CustomPath); - ByYearSchoolJson.From(rankingsSet).WriteToFile(outFolder, ByYearSchoolJson.CustomPath); - BySchoolYearCourseJson.From(rankingsSet).WriteToFile(outFolder, BySchoolYearCourseJson.CustomPath); + BySchoolYearJson.From(rankingsSet).Write(outFolder, BySchoolYearJson.CustomPath); + ByYearSchoolJson.From(rankingsSet).Write(outFolder, ByYearSchoolJson.CustomPath); + BySchoolYearCourseJson.From(rankingsSet).Write(outFolder, BySchoolYearCourseJson.CustomPath); } } \ No newline at end of file From dbd359975ead5f9b7dcf09565b86be629e79bef8 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 17:57:38 +0200 Subject: [PATCH 17/33] refactor: StatsJson.cs and related --- .../Objects/Json/Stats/StatsJson.cs | 89 ++++--------------- .../Objects/Json/Stats/StatsSchool.cs | 22 ++++- .../Objects/Json/Stats/StatsSingleJson.cs | 2 +- .../Objects/Json/Stats/StatsYear.cs | 62 ++++++++++++- .../Utils/Output/OutputWriteUtil.cs | 2 +- 5 files changed, 99 insertions(+), 78 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs index b18203166..ca80c939c 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs @@ -1,10 +1,11 @@ #region +using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; -using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; +// ReSharper disable CanSimplifyDictionaryLookupWithTryAdd #endregion @@ -14,96 +15,36 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsJson { - private const string PathStats = "stats"; + private const string StatsFolderName = "stats"; public DateTime LastUpdate = DateTime.UtcNow; public SortedDictionary Stats = new(); - public static void Write(string outFolder, RankingsSet rankingsSet, ArgsConfig argsConfig) - { - var statsJson = From(rankingsSet); - foreach (var yearDict in statsJson.Stats) WriteToFileYear(outFolder, yearDict, argsConfig); - } - - private static StatsJson From(RankingsSet rankingsSet) + public static StatsJson From(RankingsSet rankingsSet) { var statsJson = new StatsJson(); - foreach (var ranking in rankingsSet.Rankings) GenerateSingleRanking(rankingsSet, ranking, statsJson); - - foreach (var year in statsJson.Stats.Keys) - foreach (var school in statsJson.Stats[year].Schools.Keys) - { - var statsSingleCourseJsons = - statsJson.Stats[year].Schools[school].List.OrderBy(x => x.SingleCourseJson.Link); - statsJson.Stats[year].Schools[school].List = statsSingleCourseJsons.ToList(); - } - return statsJson; - } - - private static void GenerateSingleRanking(RankingsSet rankingsSet, Ranking ranking, StatsJson statsJson) - { - if (ranking.Year == null) return; - if (!statsJson.Stats.ContainsKey(ranking.Year.Value)) + var byYears = rankingsSet.Rankings.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); + foreach (var yearGroup in byYears) { - var statsJsonStat = new StatsYear - { - NumStudents = rankingsSet.Rankings.Where(x => x.Year == ranking.Year) - .Select(x => (x.RankingSummary ?? x.CreateSummary()).HowManyStudents ?? 0).Sum() - }; - statsJson.Stats[ranking.Year.Value] = statsJsonStat; - } + var statsYear = StatsYear.From(yearGroup.ToList()); - if (ranking.School == null) return; - var schools = statsJson.Stats[ranking.Year.Value].Schools; - if (!schools.ContainsKey(ranking.School.Value)) - { - var rankings = rankingsSet.Rankings.Where(r => r.Year == ranking.Year && r.School == ranking.School); - var statsSchool = new StatsSchool - { - NumStudents = rankings.Select(x => (x.RankingSummary ?? x.CreateSummary()).HowManyStudents ?? 0).Sum() - }; - schools[ranking.School.Value] = statsSchool; + if (statsJson.Stats.ContainsKey(yearGroup.Key)) throw new UnreachableException(); // should be impossible + statsJson.Stats.Add(yearGroup.Key, statsYear); } - var statsSingleCourseJsons = ranking.ToStats().DistinctBy(x => new - { - x.SingleCourseJson.Link, x.SingleCourseJson.Location - }); - foreach (var variable in statsSingleCourseJsons) schools[ranking.School.Value].List.Add(variable); - } - - private static void WriteToFileYear(string outFolder, KeyValuePair yearDict, ArgsConfig argsConfig) - { - var statsPath = Path.Join(outFolder, PathStats); - if (!Directory.Exists(statsPath)) Directory.CreateDirectory(statsPath); - - var jsonPath = Path.Join(statsPath, yearDict.Key + ".json"); - if (ExitIfThereIsntAnUpdate(jsonPath, yearDict.Value) && !argsConfig.ForceReparsing) return; - - var jsonString = JsonConvert.SerializeObject(yearDict.Value, Culture.JsonSerializerSettings); - File.WriteAllText(jsonPath, jsonString); + return statsJson; } - private static bool ExitIfThereIsntAnUpdate(string jsonPath, StatsYear variableValue) + public void Write(string outFolder, ArgsConfig argsConfig) { - try - { - if (!File.Exists(jsonPath)) return false; - - var read = File.ReadAllText(jsonPath); - var jsonRead = JsonConvert.DeserializeObject(read, Culture.JsonSerializerSettings); - var hashRead = jsonRead?.GetHashWithoutLastUpdate(); - var hashThis = variableValue.GetHashWithoutLastUpdate(); + var statsFolderPath = Path.Join(outFolder, StatsFolderName); + if (!Directory.Exists(statsFolderPath)) Directory.CreateDirectory(statsFolderPath); - return hashRead == hashThis; - } - catch (Exception ex) + foreach (var yearStats in Stats.Values) { - Console.WriteLine(ex); + yearStats.Write(statsFolderPath, argsConfig); } - - return false; } public int GetHashWithoutLastUpdate() diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs index 95bd1c1fb..428a772f4 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs @@ -2,6 +2,9 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; + +// ReSharper disable CanSimplifyDictionaryLookupWithTryAdd #endregion @@ -13,7 +16,24 @@ public class StatsSchool { public List List = new(); public int NumStudents; - + + public static StatsSchool From(List pRankings) + { + var statsSchool = new StatsSchool(); + var rankings = pRankings.Where(r => r.Year != null && r.School != null).ToList(); + + statsSchool.NumStudents = + rankings.Select(x => (x.RankingSummary ?? x.CreateSummary()).HowManyStudents ?? 0).Sum(); + + statsSchool.List = rankings + .SelectMany(r => r.ToStats()) + .DistinctBy(x => new { x.SingleCourseJson.Id, x.SingleCourseJson.Location }) + .OrderBy(x => x.SingleCourseJson.Id) + .ToList(); + + return statsSchool; + } + public int GetHashWithoutLastUpdate() { var i = NumStudents; diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs index 9e134e4c1..255979716 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs @@ -24,7 +24,7 @@ public StatsSingleCourseJson(SingleCourseJson singleCourseJson, RankingSummary s public static List From(Ranking ranking) { var singleCourseJsons = ranking.ToSingleCourseJson(); - if (ranking.RankingSummary != null) ranking.RankingSummary = ranking.CreateSummary(); + ranking.RankingSummary ??= ranking.CreateSummary(); return singleCourseJsons.Select(scj => new StatsSingleCourseJson(scj, ranking.RankingSummary!)).ToList(); } diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs index 3f0e74fe1..aeadc7ca7 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs @@ -1,19 +1,79 @@ #region +using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; +using PoliNetwork.Graduatorie.Common.Data; using PoliNetwork.Graduatorie.Common.Enums; +using PoliNetwork.Graduatorie.Common.Objects; +using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; +// ReSharper disable CanSimplifyDictionaryLookupWithTryAdd #endregion namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; +using SchoolsDict = SortedDictionary; + [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsYear { - public SortedDictionary Schools = new(); + public SchoolsDict Schools = new(); public int NumStudents; + public int Year; + + public static StatsYear From(List rankings) + { + var statsYear = new StatsYear(); + + statsYear.Year = rankings.First(r => r.Year != null).Year!.Value; // just hilarious + statsYear.NumStudents = + rankings.Select(r => (r.RankingSummary ?? r.CreateSummary()).HowManyStudents ?? 0).Sum(); // this ?? is crazy + + var bySchool = rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); + foreach (var schoolGroup in bySchool) + { + var statsSchool = StatsSchool.From(schoolGroup.ToList()); + + if (statsYear.Schools.ContainsKey(schoolGroup.Key)) throw new UnreachableException(); // should be impossible, right? + statsYear.Schools.Add(schoolGroup.Key, statsSchool); + } + + return statsYear; + } + + public void Write(string statsFolderPath, ArgsConfig argsConfig) + { + var fullJsonPath = Path.Join(statsFolderPath, $"{Year}.json"); + if (ExitIfThereIsntAnUpdate(fullJsonPath) && !argsConfig.ForceReparsing) return; + + var jsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); + File.WriteAllText(fullJsonPath, jsonString); + } + + private bool ExitIfThereIsntAnUpdate(string fullJsonPath) + { + try + { + if (!File.Exists(fullJsonPath)) return false; + + var saved = File.ReadAllText(fullJsonPath); + var savedStats = JsonConvert.DeserializeObject(saved, Culture.JsonSerializerSettings); + + if (string.IsNullOrEmpty(saved) || savedStats == null) return false; + + var savedHash = savedStats?.GetHashWithoutLastUpdate(); + var hash = GetHashWithoutLastUpdate(); + return savedHash == hash; + } + catch (Exception ex) + { + Console.WriteLine(ex); + } + + return false; + } public int GetHashWithoutLastUpdate() { diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs index f8dfad6f0..bea12be55 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs @@ -26,7 +26,7 @@ public void SaveOutputs(RankingsSet rankingsSet, DateFound dateFound) rankingsSet.WriteAllRankings(outFolder, _config.ForceReparsing); IndexJsonBase.WriteAllIndexes(rankingsSet, outFolder, _config); - StatsJson.Write(outFolder, rankingsSet, _config); + StatsJson.From(rankingsSet).Write(outFolder, _config); HashMatricoleWrite.Write(rankingsSet, outFolder); dateFound.WriteToFile(_config.DataFolder); From 6089436fd372fd1ed08c5ee586c1693a2f03126d Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 19:29:28 +0200 Subject: [PATCH 18/33] hotfix: distinct SingleCourseJson in BySchoolYearJson.All --- .../Json/Indexes/Specific/BySchoolYearJson.cs | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs index 8553bc94f..6efc0f4b9 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs @@ -26,10 +26,14 @@ public static BySchoolYearJson From(RankingsSet set) { var mainJson = new BySchoolYearJson { LastUpdate = set.LastUpdate }; - var list = set.Rankings.SelectMany(r => r.ToSingleCourseJson()).ToList(); + var list = set.Rankings + .SelectMany(r => r.ToSingleCourseJson()) + .DistinctBy(r => new { r.Id, r.Location }) + .ToList(); + list.Sort(); mainJson.All = list; - + // group rankings by school var bySchool = set.Rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); foreach (var schoolGroup in bySchool) @@ -47,16 +51,16 @@ public static BySchoolYearJson From(RankingsSet set) private static YearsDict GetYearsDict(IEnumerable> byYears) { var yearsDict = new YearsDict(); - + foreach (var yearGroup in byYears) { - var singleCourseJsons = yearGroup - .SelectMany(r => r.ToSingleCourseJson()) - .DistinctBy(r => r.Id) - .OrderBy(e => e.Id) // Id contains everything (school, year, pri/sec phase, extraeu, lang) - .ToList(); - - yearsDict.Add(yearGroup.Key, singleCourseJsons); + var singleCourseJsons = yearGroup + .SelectMany(r => r.ToSingleCourseJson()) + .DistinctBy(r => r.Id) + .OrderBy(e => e.Id) // Id contains everything (school, year, pri/sec phase, extraeu, lang) + .ToList(); + + yearsDict.Add(yearGroup.Key, singleCourseJsons); } return yearsDict; @@ -94,7 +98,7 @@ public List GetRankings(string outFolder) var ranking = Ranking.FromJson(fullPath); if (ranking != null) rankings.Add(ranking); } - + return rankings; } } \ No newline at end of file From e50326fd6d8e314c9ccbe14eaf9cbce9ff2415ed Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 19:31:40 +0200 Subject: [PATCH 19/33] hotfix: same distinct as BySchoolYearJson for other indexes --- .../Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs | 6 +++++- .../Objects/Json/Indexes/Specific/ByYearSchoolJson.cs | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index 9f48e104b..516133871 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -29,7 +29,11 @@ public static BySchoolYearCourseJson From(RankingsSet set) { var mainJson = new BySchoolYearCourseJson { LastUpdate = set.LastUpdate }; - var list = set.Rankings.SelectMany(r => r.ToSingleCourseJson()).ToList(); + var list = set.Rankings + .SelectMany(r => r.ToSingleCourseJson()) + .DistinctBy(r => new { r.Id, r.Location }) + .ToList(); + list.Sort(); mainJson.All = list; diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs index 247bba594..48f723fb1 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs @@ -25,7 +25,11 @@ public static ByYearSchoolJson From(RankingsSet set) { var mainJson = new ByYearSchoolJson { LastUpdate = set.LastUpdate }; - var list = set.Rankings.SelectMany(r => r.ToSingleCourseJson()).ToList(); + var list = set.Rankings + .SelectMany(r => r.ToSingleCourseJson()).ToList() + .DistinctBy(r => new { r.Id, r.Location }) + .ToList(); + list.Sort(); mainJson.All = list; From abdeff784ff8b32a28f95f6f9064ba466a671ae2 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 20:02:59 +0200 Subject: [PATCH 20/33] fix: remove Location from Distinct if not necessary --- .../Objects/Json/Indexes/Specific/BySchoolYearJson.cs | 2 +- .../Objects/Json/Indexes/Specific/ByYearSchoolJson.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs index 6efc0f4b9..2798b3660 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs @@ -28,7 +28,7 @@ public static BySchoolYearJson From(RankingsSet set) var list = set.Rankings .SelectMany(r => r.ToSingleCourseJson()) - .DistinctBy(r => new { r.Id, r.Location }) + .DistinctBy(r => new { r.Id }) .ToList(); list.Sort(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs index 48f723fb1..a5b153efd 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs @@ -27,7 +27,7 @@ public static ByYearSchoolJson From(RankingsSet set) var list = set.Rankings .SelectMany(r => r.ToSingleCourseJson()).ToList() - .DistinctBy(r => new { r.Id, r.Location }) + .DistinctBy(r => new { r.Id }) .ToList(); list.Sort(); From 1f9919ae2c3e18739dd5d7977d8303c0099a8466 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 20:38:16 +0200 Subject: [PATCH 21/33] refactor: remove unused Ranking methods --- .../Objects/RankingNS/Ranking.cs | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index c2e1dec31..56fbc52b0 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -73,26 +73,6 @@ public bool IsSimilarTo(Ranking ranking) } - public void Merge(Ranking ranking) - { - LastUpdate = LastUpdate > ranking.LastUpdate ? LastUpdate : ranking.LastUpdate; - Year ??= ranking.Year; - Extra ??= ranking.Extra; - School ??= ranking.School; - MergeRankingOrder(ranking); - ByCourse ??= ranking.ByCourse; - ByMerit ??= ranking.ByMerit; - Url ??= ranking.Url; - } - - private void MergeRankingOrder(Ranking ranking) - { - if (RankingOrder == null) - RankingOrder = ranking.RankingOrder; - else - RankingOrder.Merge(ranking.RankingOrder); - } - public string GetFilename() { var id = GetId(); From 5cce9696fca72fc6f8063cd22a11e25b8be4b036 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 20:39:25 +0200 Subject: [PATCH 22/33] refactor: remove unused RankingOrder method --- .../Objects/RankingNS/RankingOrder.cs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs index 561908cae..891e274fc 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs @@ -101,11 +101,4 @@ public int GetHashWithoutLastUpdate() return i; } - - public void Merge(RankingOrder? rankingRankingOrder) - { - Phase ??= rankingRankingOrder?.Phase; - Primary ??= rankingRankingOrder?.Primary; - Secondary ??= rankingRankingOrder?.Secondary; - } } \ No newline at end of file From 1dcef862653e89ed2312af48130c2af00179b0f1 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 20:42:44 +0200 Subject: [PATCH 23/33] refactor: RankingsSet.cs --- .../Objects/RankingNS/RankingsSet.cs | 29 ++++++++----------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs index 092ee92c9..908d396aa 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs @@ -20,24 +20,19 @@ public RankingsSet() LastUpdate = DateTime.UtcNow; } - - public void AddRanking(Ranking ranking) - { - var alreadyPresent = Contains(ranking); - if (!alreadyPresent) - Rankings.Add(ranking); - - if (LastUpdate == null || ranking.LastUpdate.Date > LastUpdate?.Date) LastUpdate = ranking.LastUpdate; - } - - public bool Contains(Ranking ranking) - { - return Rankings.Any(v => v.IsSimilarTo(ranking)); - } - - public void Merge(RankingsSet set) + public void Merge(RankingsSet newSet) { - foreach (var ranking in set.Rankings) AddRanking(ranking); + foreach (var ranking in newSet.Rankings) + { + var alreadyPresent = Rankings.Any(v => v.IsSimilarTo(ranking)); + if (!alreadyPresent) + { + Rankings.Add(ranking); + + if (LastUpdate == null || ranking.LastUpdate.Date > LastUpdate?.Date) + LastUpdate = ranking.LastUpdate; + } + } } public void WriteAllRankings(string outFolder, bool forceReparse = false) From c4f14db867f16a1f441c87634a7c445c6e8a740b Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 20:43:28 +0200 Subject: [PATCH 24/33] chore: format RankingSummary.cs --- .../Objects/RankingNS/RankingSummary.cs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs index 26b09dd83..fb4cc3cc4 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingSummary.cs @@ -57,9 +57,7 @@ public static RankingSummary From(Ranking ranking) titleLocation.Location = x.Key.Location; return titleLocation; }); - var tableStatsList = - distinctBy - ?.ToList(); + var tableStatsList = distinctBy?.ToList(); var tableStatsList2 = Get(tableStatsList); var resultsSummarized = new SortedDictionary(keyValuePairs ?? new Dictionary()); return new RankingSummary From 4d9f2f8e8124c5ebf1cc5520647c647b4310f947 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:08:51 +0200 Subject: [PATCH 25/33] refactor: HashMatricoleWrite.cs --- .../Objects/Json/Stats/StatsSingleJson.cs | 2 +- .../Objects/Json/Stats/StatsYear.cs | 2 +- .../Utils/Output/HashMatricoleWrite.cs | 120 +++++++++--------- .../Utils/Output/OutputWriteUtil.cs | 2 +- 4 files changed, 61 insertions(+), 65 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs index 255979716..07ca9f281 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSingleJson.cs @@ -25,7 +25,7 @@ public static List From(Ranking ranking) { var singleCourseJsons = ranking.ToSingleCourseJson(); ranking.RankingSummary ??= ranking.CreateSummary(); - return singleCourseJsons.Select(scj => new StatsSingleCourseJson(scj, ranking.RankingSummary!)).ToList(); + return singleCourseJsons.Select(scj => new StatsSingleCourseJson(scj, ranking.RankingSummary)).ToList(); } public int GetHashWithoutLastUpdate() diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs index aeadc7ca7..c3a805522 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs @@ -63,7 +63,7 @@ private bool ExitIfThereIsntAnUpdate(string fullJsonPath) if (string.IsNullOrEmpty(saved) || savedStats == null) return false; - var savedHash = savedStats?.GetHashWithoutLastUpdate(); + var savedHash = savedStats.GetHashWithoutLastUpdate(); var hash = GetHashWithoutLastUpdate(); return savedHash == hash; } diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs index 9036330d1..12fc11eb6 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs @@ -2,34 +2,46 @@ using Newtonsoft.Json; using PoliNetwork.Graduatorie.Common.Data; -using PoliNetwork.Graduatorie.Parser.Objects; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; #endregion namespace PoliNetwork.Graduatorie.Parser.Utils.Output; -public static class HashMatricoleWrite +using IdsDict = SortedDictionary; + +public class HashMatricoleWrite { - public static void Write(RankingsSet rankingsSet, string outFolder) + internal const string FolderName = "hashMatricole"; + public IdsDict IdsDict = new(); + + public static HashMatricoleWrite From(RankingsSet rankingsSet) { - var dictionary = GetDictToWrite(rankingsSet); - Sort2(dictionary); - WriteToFile(dictionary, outFolder); + return new HashMatricoleWrite + { + IdsDict = GetIdsDict(rankingsSet) + }; } + - private static void Sort2(SortedDictionary dict) + public void Write(string outFolder) { - var keys = dict.Keys; - foreach (var key in keys) + Console.WriteLine($"[INFO] Students with id are {IdsDict.Keys.Count}"); + + var groupsDict = GetGroupsDict(); + var hashMatricoleFolder = Path.Join(outFolder, FolderName); + if (!Directory.Exists(hashMatricoleFolder)) Directory.CreateDirectory(hashMatricoleFolder); + + foreach (var (id, idsDict) in groupsDict) { - var item = dict[key]; - item.Sort2(); + var idsDictJson = JsonConvert.SerializeObject(idsDict, Culture.JsonSerializerSettings); + var filename = $"{id}.json"; + var fullPath = Path.Join(hashMatricoleFolder, filename); + File.WriteAllText(fullPath, idsDictJson); } } - private static SortedDictionary GetDictToWrite(RankingsSet rankingsSet) + private static IdsDict GetIdsDict(RankingsSet rankingsSet) { var dictionary = new SortedDictionary(); foreach (var ranking in rankingsSet.Rankings) @@ -37,70 +49,54 @@ private static SortedDictionary GetDictToWrite(Ranki var byMeritRows = ranking.ByMerit?.Rows; if (byMeritRows != null) foreach (var student in byMeritRows.Where(student => !string.IsNullOrEmpty(student.Id))) - AddToDict(dictionary, ranking, student, null); + { + var id = student.Id!; + if (!dictionary.ContainsKey(id)) dictionary.Add(id, new StudentHashSummary()); + dictionary[id].Merge(student, ranking, null); + } var rankingByCourse = ranking.ByCourse; if (rankingByCourse == null) continue; - foreach (var courseTable in rankingByCourse) + foreach (var courseTable in rankingByCourse.Where(c => c.Rows != null)) { - var row = courseTable.Rows; - if (row == null) continue; - foreach (var studentResult in row.Where(studentResult => !string.IsNullOrEmpty(studentResult.Id))) - AddToDict(dictionary, ranking, studentResult, courseTable); + var row = courseTable.Rows!; + foreach (var student in row.Where(studentResult => !string.IsNullOrEmpty(studentResult.Id))) + { + var id = student.Id!; + + if (!dictionary.ContainsKey(id)) dictionary.Add(id, new StudentHashSummary()); + dictionary[id].Merge(student, ranking, courseTable); + } } } - return dictionary; - } - - private static void WriteToFile(SortedDictionary dictionary, string outFolder) - { - Console.WriteLine($"[INFO] Students with id are {dictionary.Keys.Count}"); - - - var dictResult = - new SortedDictionary>(); - - foreach (var variable in dictionary) + foreach (var item in dictionary.Values) { - var key = variable.Key[..2]; - if (!dictResult.ContainsKey(key)) - dictResult[key] = new SortedDictionary(); - - if (!dictResult[key].ContainsKey(variable.Key)) - dictResult[key][variable.Key] = variable.Value; + item.Sort2(); } - var hashMatricole = outFolder + "/hashMatricole"; - if (!Directory.Exists(hashMatricole)) Directory.CreateDirectory(hashMatricole); - - foreach (var variable in dictResult) WriteSingleHashFile(variable, hashMatricole); - } - - private static void WriteSingleHashFile(KeyValuePair> variable, - string hashMatricole) - { - var studentHashSummaries = variable.Value; - var toWrite = JsonConvert.SerializeObject(studentHashSummaries, Culture.JsonSerializerSettings); - File.WriteAllText(hashMatricole + "/" + variable.Key + ".json", toWrite); + return dictionary; } - private static void AddToDict(IDictionary dictionary, Ranking ranking, - StudentResult student, CourseTable? courseTable) + private SortedDictionary GetGroupsDict() { - var id = student.Id; - if (string.IsNullOrEmpty(id)) - return; + var groupsDict = new SortedDictionary(); + var groups = IdsDict.GroupBy(pair => pair.Key[..2]); - if (dictionary.TryGetValue(id, out var studentPresent)) + foreach (var group in groups) { - studentPresent.Merge(student, ranking, courseTable); - } - else - { - var studentHashSummary = new StudentHashSummary(); - studentHashSummary.Merge(student, ranking, courseTable); - dictionary[id] = studentHashSummary; + var groupId = group.Key; + var groupVal = group.ToList(); + + var groupIdsDict = new IdsDict(); + foreach (var (id, studentHashSummary) in groupVal) + { + groupIdsDict.Add(id, studentHashSummary); + } + + groupsDict.Add(groupId, groupIdsDict); } + + return groupsDict; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs index bea12be55..1f422b5ed 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs @@ -27,7 +27,7 @@ public void SaveOutputs(RankingsSet rankingsSet, DateFound dateFound) rankingsSet.WriteAllRankings(outFolder, _config.ForceReparsing); IndexJsonBase.WriteAllIndexes(rankingsSet, outFolder, _config); StatsJson.From(rankingsSet).Write(outFolder, _config); - HashMatricoleWrite.Write(rankingsSet, outFolder); + HashMatricoleWrite.From(rankingsSet).Write(outFolder); dateFound.WriteToFile(_config.DataFolder); } From 4ee5e5fd9732e64b7bcd339ff1dfe0a88a9c7802 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:11:18 +0200 Subject: [PATCH 26/33] chore: compact class init and method call --- PoliNetwork.Graduatorie.Parser/Main/Program.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Main/Program.cs b/PoliNetwork.Graduatorie.Parser/Main/Program.cs index 2f6fb39da..c993f2cdd 100644 --- a/PoliNetwork.Graduatorie.Parser/Main/Program.cs +++ b/PoliNetwork.Graduatorie.Parser/Main/Program.cs @@ -35,7 +35,6 @@ private static void RunParser(ArgsConfig argsConfig, IEnumerable ran var dateFound = DateFoundUtil.GetDateFound(argsConfig, rankingsSet); // salvare il set - var writer = new OutputWriteUtil(argsConfig); - writer.SaveOutputs(rankingsSet, dateFound); + new OutputWriteUtil(argsConfig).SaveOutputs(rankingsSet, dateFound); } } \ No newline at end of file From d177b71b440b9df0ef8e1301cc19a4518749192a Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:13:48 +0200 Subject: [PATCH 27/33] refactor: RankingSummaryStudent.cs --- .../Utils/Output/RankingSummaryStudent.cs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs index 77f088cdc..2a0299f88 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs @@ -11,7 +11,7 @@ namespace PoliNetwork.Graduatorie.Parser.Utils.Output; [Serializable] [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] -public class RankingSummaryStudent +public class RankingSummaryStudent : IEquatable { public readonly string? Course; public readonly string? Phase; @@ -70,14 +70,15 @@ public override bool Equals(object? obj) Year == rankingSummaryStudent.Year && equals && Course == rankingSummaryStudent.Course; } - protected bool Equals(RankingSummaryStudent other) + public bool Equals(RankingSummaryStudent? other) { - return Phase == other.Phase && School == other.School && Year == other.Year && Equals(Url, other.Url) && - Course == other.Course; + if (ReferenceEquals(null, other)) return false; + if (ReferenceEquals(this, other)) return true; + return Course == other.Course && Phase == other.Phase && School == other.School && Equals(Url, other.Url) && Year == other.Year; } public override int GetHashCode() { - return HashCode.Combine(Phase, School, Year, Url, Course); + return HashCode.Combine(Course, Phase, School, Url, Year); } } \ No newline at end of file From 53c303d2fc50e8ace270ca7e233b07e18d4c6efd Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:18:40 +0200 Subject: [PATCH 28/33] refactor: StudentHashSummary.cs --- .../Utils/Output/HashMatricoleWrite.cs | 2 +- .../Utils/Output/StudentHashSummary.cs | 21 +++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs index 12fc11eb6..86d4ae066 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs @@ -72,7 +72,7 @@ private static IdsDict GetIdsDict(RankingsSet rankingsSet) foreach (var item in dictionary.Values) { - item.Sort2(); + item.Sort(); } return dictionary; diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs index c16c963a1..db7eb69fc 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/StudentHashSummary.cs @@ -31,25 +31,18 @@ public void Merge(StudentResult student, Ranking ranking, CourseTable? courseTab else { var s = courseTable.GetRankingSummaryStudent(ranking); - var present1 = SingleCourseJsons.Any(x => x.Equals(s)); - if (!present1) - SingleCourseJsons.Add(s); + var alreadyPresentJson = SingleCourseJsons.Any(x => x.Equals(s)); + if (!alreadyPresentJson) SingleCourseJsons.Add(s); } var r = ranking.GetRankingSummaryStudent(); - var present2 = RankingSummaries.Any(x => x.Equals(r)); - if (!present2) - RankingSummaries.Add(r); + var alreadyPresentSummary = RankingSummaries.Any(x => x.Equals(r)); + if (!alreadyPresentSummary) RankingSummaries.Add(r); } - public void Sort2() + public void Sort() { - RankingSummaries.Sort(Compare); - SingleCourseJsons.Sort(Compare); - } - - private int Compare(RankingSummaryStudent a, RankingSummaryStudent b) - { - return a.Compare(b); + RankingSummaries.Sort(); + SingleCourseJsons.Sort(); } } \ No newline at end of file From 22177592fb9d62baafd156ad9736fb42b7cdbb64 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:22:19 +0200 Subject: [PATCH 29/33] refactor: Scraper/Program.cs --- PoliNetwork.Graduatorie.Scraper/Main/Program.cs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Main/Program.cs b/PoliNetwork.Graduatorie.Scraper/Main/Program.cs index d54620232..19ed7aeac 100644 --- a/PoliNetwork.Graduatorie.Scraper/Main/Program.cs +++ b/PoliNetwork.Graduatorie.Scraper/Main/Program.cs @@ -1,5 +1,6 @@ #region +using Newtonsoft.Json; using PoliNetwork.Core.Utils; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; @@ -27,21 +28,12 @@ public static List RankingsUrls(Metrics mt, ArgsConfig argsConfig) var rankingsUrls = mt.Execute(LinksFind.GetAll).ToList(); rankingsUrls = ScraperOutput.GetWithUrlsFromLocalFileLinks(rankingsUrls, argsConfig.DataFolder); - // save result - PrintAndWriteResults(rankingsUrls, argsConfig); + PrintLinks(rankingsUrls); + ScraperOutput.Write(rankingsUrls, argsConfig.DataFolder); // save results return rankingsUrls; } - private static void PrintAndWriteResults(List rankingsUrls, ArgsConfig argsConfig) - { - //write results to file - ScraperOutput.Write(rankingsUrls, argsConfig.DataFolder); - - //print links found - PrintLinks(rankingsUrls); - } - private static void PrintLinks(List rankingsUrls) { foreach (var r in rankingsUrls) From 8087560799f53b4d97e35982c45bbc5e3c7e714a Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:32:55 +0200 Subject: [PATCH 30/33] refactor: CheckUrlUtil.cs --- .../Utils/Web/CheckUrlUtil.cs | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs index ab375a6c4..c5dbdc8c3 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/CheckUrlUtil.cs @@ -9,23 +9,6 @@ namespace PoliNetwork.Graduatorie.Scraper.Utils.Web; public static class CheckUrlUtil { - private static void CheckUrl(RankingUrl variable, HashSet final) - { - try - { - var x = UrlUtils.CheckUrl(variable); - if (!x) return; - lock (final) - { - final.Add(variable); - } - } - catch (Exception exception) - { - Console.WriteLine(exception); - } - } - public static HashSet GetRankingLinks(IEnumerable rankingsLinks) { var parallelQuery = rankingsLinks @@ -36,18 +19,35 @@ public static HashSet GetRankingLinks(IEnumerable rankingsLi return GetRankingLinksHashSet(parallelQuery); } - public static HashSet GetRankingLinksHashSet(IEnumerable parallelQuery) + public static HashSet GetRankingLinksHashSet(IEnumerable urls) { - var final = new HashSet(); + var hashSet = new HashSet(); - var action = parallelQuery.Select((Func)Selector).ToArray(); - Parallel.Invoke(action); + var actions = urls.Select((Func)Selector).ToArray(); + Parallel.Invoke(actions); - return final; + return hashSet; - Action Selector(RankingUrl variable) + Action Selector(RankingUrl url) { - return () => { CheckUrl(variable, final); }; + return () => { CheckUrl(url, hashSet); }; + } + } + + private static void CheckUrl(RankingUrl url, HashSet hashSet) + { + try + { + var x = UrlUtils.CheckUrl(url); + if (!x) return; + lock (hashSet) + { + hashSet.Add(url); + } + } + catch (Exception exception) + { + Console.WriteLine(exception); } } } \ No newline at end of file From 971c8d67a78c812cdcc0872edb6d548a50a2f3f0 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:44:26 +0200 Subject: [PATCH 31/33] refactor: ScraperOutput.cs --- .../Utils/Web/ScraperOutput.cs | 59 +++++-------------- 1 file changed, 15 insertions(+), 44 deletions(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs index 4c32c1a3f..0313a1706 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs @@ -9,79 +9,45 @@ namespace PoliNetwork.Graduatorie.Scraper.Utils.Web; public static class ScraperOutput { - private static string GetFilePath(string? docFolder) - { - return docFolder + "/" + Constants.OutputLinksFilename; - } - public static List GetWithUrlsFromLocalFileLinks(IEnumerable urls, string? dataFolder) + public static List GetWithUrlsFromLocalFileLinks(IEnumerable urls, string dataFolder) { var links = GetSaved(dataFolder); links.AddRange(urls); - return Distinct(links); + return links.DistinctBy(r => r.Url).ToList(); } - private static List Distinct(IEnumerable links) - { - var list = new List(); - var rankingUrls = links.Where(variable => list.All(x => x.Url != variable.Url)); - list.AddRange(rankingUrls); - return list; - } - - private static List GetSaved(string? dataFolder) + private static List GetSaved(string dataFolder) { List list = new(); var filePath = GetFilePath(dataFolder); if (!File.Exists(filePath)) return list; - var lines = GetLines(filePath); - if (lines == null) - { - // consider to handle them - Console.WriteLine($"[ERROR] Can't read the ScraperOutput file ({filePath})"); - return list; - } - + var urls = GetLines(filePath); try { - foreach (var variable in lines) RankingFromAdd(variable, list); + return urls.Select(RankingUrl.From).ToList(); } catch { // consider to handle them Console.WriteLine($"[ERROR] Can't validate the ScraperOutput file ({filePath})"); + return new(); } - - return list; } - private static void RankingFromAdd(string variable, ICollection list) + private static List GetLines(string filePath) { try { - var rankingUrl = RankingUrl.From(variable); - list.Add(rankingUrl); - } - catch (Exception exception) - { - Console.WriteLine(exception); - } - } - - private static List? GetLines(string filePath) - { - List? lines = null; - try - { - lines = File.ReadAllLines(filePath).Where(x => !string.IsNullOrEmpty(x)).ToList(); + return File.ReadAllLines(filePath).Where(x => !string.IsNullOrEmpty(x)).ToList(); } catch (Exception exception) { Console.WriteLine(exception); + Console.WriteLine($"[ERROR] Can't read the ScraperOutput file ({filePath})"); + return new(); } - - return lines; } public static void Write(List rankingsUrls, string? dataFolder) @@ -118,4 +84,9 @@ private static string SelectorUrlWithEndLine(string url) { return url + "\n"; } + + private static string GetFilePath(string dataFolder) + { + return Path.Join(dataFolder, Constants.OutputLinksFilename); + } } \ No newline at end of file From 69e2b1b91e87e5557da7478876b0b277b9078eec Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Sun, 7 Jul 2024 22:47:03 +0200 Subject: [PATCH 32/33] chore: remove unused import --- PoliNetwork.Graduatorie.Scraper/Main/Program.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/PoliNetwork.Graduatorie.Scraper/Main/Program.cs b/PoliNetwork.Graduatorie.Scraper/Main/Program.cs index 19ed7aeac..9a2b27711 100644 --- a/PoliNetwork.Graduatorie.Scraper/Main/Program.cs +++ b/PoliNetwork.Graduatorie.Scraper/Main/Program.cs @@ -1,6 +1,5 @@ #region -using Newtonsoft.Json; using PoliNetwork.Core.Utils; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Common.Objects.RankingNS; From 49d8ca972ac58533fea02c256a365a8a3278881b Mon Sep 17 00:00:00 2001 From: angeousta <132761637+angeousta@users.noreply.github.com> Date: Sun, 7 Jul 2024 23:34:55 +0200 Subject: [PATCH 33/33] chore: inspect code (#169) * Update IndexJsonBase.cs, OutputWriteUtil.cs, and Converter.cs * Update Lib_CSharp * Update Ranking.cs * Update BySchoolYearJson.cs * Update StatsSchool.cs and StatsYear.cs * Update Ranking.cs * Update RankingsSet.cs * Update RankingsSet.cs, CourseTable.cs, and HashMatricoleWrite.cs * Update BySchoolYearCourseJson.cs, StatsSchool.cs, Ranking.cs, and 2 more files * Update EnrollType.cs, IndexJsonBase.cs, BySchoolYearJson.cs, and 4 more files * Update BySchoolYearCourseJson.cs, SingleCourseJson.cs, StatsYear.cs, and 5 more files * fix: check saved ranking before saving (#170) --------- Co-authored-by: userbot_github Co-authored-by: Lorenzo Corallo <66379281+lorenzocorallo@users.noreply.github.com> --- Lib_CSharp | 2 +- .../Objects/EnrollType.cs | 2 +- .../Objects/Json/Indexes/IndexJsonBase.cs | 5 +- .../Specific/BySchoolYearCourseJson.cs | 10 ++-- .../Json/Indexes/Specific/BySchoolYearJson.cs | 16 ++--- .../Json/Indexes/Specific/ByYearSchoolJson.cs | 10 ++-- .../Objects/Json/SingleCourseJson.cs | 34 +++++------ .../Objects/Json/Stats/StatsJson.cs | 6 +- .../Objects/Json/Stats/StatsSchool.cs | 4 +- .../Objects/Json/Stats/StatsYear.cs | 21 ++++--- .../Objects/RankingNS/Ranking.cs | 60 +++++++++++-------- .../Objects/RankingNS/RankingsSet.cs | 22 +++---- .../Objects/Tables/Course/CourseTable.cs | 7 +-- .../Utils/Output/HashMatricoleWrite.cs | 28 ++++----- .../Utils/Output/OutputWriteUtil.cs | 4 +- .../Utils/Output/RankingSummaryStudent.cs | 15 ++--- .../Utils/Transformer/ParserNS/Converter.cs | 49 --------------- .../Utils/Web/ScraperOutput.cs | 9 ++- 18 files changed, 121 insertions(+), 183 deletions(-) delete mode 100644 PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs diff --git a/Lib_CSharp b/Lib_CSharp index 6b7ed8c04..e53d67127 160000 --- a/Lib_CSharp +++ b/Lib_CSharp @@ -1 +1 @@ -Subproject commit 6b7ed8c04f1728570c2f7575bc1ceab2b713f04b +Subproject commit e53d671278c21d349835af31394d7bc42be49e7b diff --git a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs index 2fb946ecb..09d44d996 100644 --- a/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs +++ b/PoliNetwork.Graduatorie.Common/Objects/EnrollType.cs @@ -23,7 +23,7 @@ public int GetHashWithoutLastUpdate() return i; } - + public static EnrollType From(string? rowCanEnrollInto, bool rowCanEnroll) { if (rowCanEnroll == false) diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs index a21bbaea5..f685e341e 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/IndexJsonBase.cs @@ -3,7 +3,6 @@ using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using PoliNetwork.Graduatorie.Common.Data; -using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; @@ -23,8 +22,8 @@ public void Write(string outFolder, string pathFile) var mainJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); File.WriteAllText(mainJsonPath, mainJsonString); } - - public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder, ArgsConfig argsConfig) + + public static void WriteAllIndexes(RankingsSet rankingsSet, string outFolder) { //now let's write each single different index BySchoolYearJson.From(rankingsSet).Write(outFolder, BySchoolYearJson.CustomPath); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs index 516133871..d41629b29 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearCourseJson.cs @@ -1,9 +1,11 @@ #region + using System.Diagnostics; using Newtonsoft.Json; using Newtonsoft.Json.Serialization; using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; + #endregion namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; @@ -20,20 +22,20 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; public class BySchoolYearCourseJson : IndexJsonBase { internal const string CustomPath = "bySchoolYearCourse.json"; + public List All = new(); // decide whether to include it in the json serialization //keys: school, year, course, location public SchoolsDict Schools = new(); - public List All = new(); // decide whether to include it in the json serialization public static BySchoolYearCourseJson From(RankingsSet set) { var mainJson = new BySchoolYearCourseJson { LastUpdate = set.LastUpdate }; - + var list = set.Rankings .SelectMany(r => r.ToSingleCourseJson()) .DistinctBy(r => new { r.Id, r.Location }) .ToList(); - + list.Sort(); mainJson.All = list; @@ -67,7 +69,7 @@ private static YearsDict GetYearsDict(IEnumerable> byYea return yearsDict; } - private static CoursesDict GetCoursesDict(IGrouping yearGroup) + private static CoursesDict GetCoursesDict(IEnumerable yearGroup) { var coursesDict = new CoursesDict(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs index 2798b3660..fd5a1c5fd 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/BySchoolYearJson.cs @@ -18,9 +18,9 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; public class BySchoolYearJson : IndexJsonBase { internal const string CustomPath = "bySchoolYear.json"; + public List All = new(); // decide whether to include it in the json serialization public SchoolsDict Schools = new(); - public List All = new(); // decide whether to include it in the json serialization public static BySchoolYearJson From(RankingsSet set) { @@ -30,7 +30,7 @@ public static BySchoolYearJson From(RankingsSet set) .SelectMany(r => r.ToSingleCourseJson()) .DistinctBy(r => new { r.Id }) .ToList(); - + list.Sort(); mainJson.All = list; @@ -90,15 +90,7 @@ public static RankingsSet GetAndParse(string dataFolder) public List GetRankings(string outFolder) { - List rankings = new(); - - foreach (var singleCourseJson in All) - { - var fullPath = singleCourseJson.GetFullPath(outFolder); - var ranking = Ranking.FromJson(fullPath); - if (ranking != null) rankings.Add(ranking); - } - - return rankings; + return All.Select(singleCourseJson => singleCourseJson.GetFullPath(outFolder)).Select(Ranking.FromJson) + .OfType().ToList(); } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs index a5b153efd..d1e973b92 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Indexes/Specific/ByYearSchoolJson.cs @@ -17,22 +17,22 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Indexes.Specific; public class ByYearSchoolJson : IndexJsonBase { internal const string CustomPath = "byYearSchool.json"; + public List All = new(); // decide whether to include it in the json serialization public YearsDict Years = new(); - public List All = new(); // decide whether to include it in the json serialization public static ByYearSchoolJson From(RankingsSet set) { var mainJson = new ByYearSchoolJson { LastUpdate = set.LastUpdate }; - + var list = set.Rankings .SelectMany(r => r.ToSingleCourseJson()).ToList() .DistinctBy(r => new { r.Id }) .ToList(); - + list.Sort(); mainJson.All = list; - + // group rankings by year var byYear = set.Rankings.Where(r => r.Year != null).GroupBy(r => r.Year!.Value); foreach (var yearGroup in byYear) @@ -57,7 +57,7 @@ private static SchoolsDict GetSchoolsDict(IEnumerable x.Link) .OrderBy(r => r.Id) .ToList(); - + schoolsDict.Add(schoolGroup.Key, filenames); } diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs index 999386523..ad6842e47 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs @@ -22,25 +22,10 @@ public class SingleCourseJson : IComparable public SchoolEnum? School; public int? Year; - public static SingleCourseJson From(Ranking ranking, CourseTable? course) - { - var basePath = $"{ranking.School}/{ranking.Year}/"; // "Ingegneria/2023" - return new SingleCourseJson - { - Link = ranking.GetFilename(), - Id = ranking.GetId(), - BasePath = basePath, - Year = ranking.Year, - School = ranking.School, - Location = course?.Location, - RankingOrder = ranking.RankingOrder - }; - } - public int CompareTo(SingleCourseJson? singleCourseJson) { if (singleCourseJson == null) return 1; - + if (Year != singleCourseJson.Year) return (Year ?? -1) < (singleCourseJson.Year ?? -1) ? -1 : 1; @@ -60,7 +45,22 @@ public int CompareTo(SingleCourseJson? singleCourseJson) return 0; } - + + public static SingleCourseJson From(Ranking ranking, CourseTable? course) + { + var basePath = $"{ranking.School}/{ranking.Year}/"; // "Ingegneria/2023" + return new SingleCourseJson + { + Link = ranking.GetFilename(), + Id = ranking.GetId(), + BasePath = basePath, + Year = ranking.Year, + School = ranking.School, + Location = course?.Location, + RankingOrder = ranking.RankingOrder + }; + } + public int GetHashWithoutLastUpdate() { var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs index ca80c939c..1befb2b14 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsJson.cs @@ -5,6 +5,7 @@ using Newtonsoft.Json.Serialization; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; + // ReSharper disable CanSimplifyDictionaryLookupWithTryAdd #endregion @@ -41,10 +42,7 @@ public void Write(string outFolder, ArgsConfig argsConfig) var statsFolderPath = Path.Join(outFolder, StatsFolderName); if (!Directory.Exists(statsFolderPath)) Directory.CreateDirectory(statsFolderPath); - foreach (var yearStats in Stats.Values) - { - yearStats.Write(statsFolderPath, argsConfig); - } + foreach (var yearStats in Stats.Values) yearStats.Write(statsFolderPath, argsConfig); } public int GetHashWithoutLastUpdate() diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs index 428a772f4..0a80f7362 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsSchool.cs @@ -17,10 +17,10 @@ public class StatsSchool public List List = new(); public int NumStudents; - public static StatsSchool From(List pRankings) + public static StatsSchool From(IEnumerable pRankings) { var statsSchool = new StatsSchool(); - var rankings = pRankings.Where(r => r.Year != null && r.School != null).ToList(); + var rankings = pRankings.Where(r => r is { Year: not null, School: not null }).ToList(); statsSchool.NumStudents = rankings.Select(x => (x.RankingSummary ?? x.CreateSummary()).HowManyStudents ?? 0).Sum(); diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs index c3a805522..53d4805a9 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Json/Stats/StatsYear.cs @@ -7,6 +7,7 @@ using PoliNetwork.Graduatorie.Common.Enums; using PoliNetwork.Graduatorie.Common.Objects; using PoliNetwork.Graduatorie.Parser.Objects.RankingNS; + // ReSharper disable CanSimplifyDictionaryLookupWithTryAdd #endregion @@ -19,27 +20,29 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json.Stats; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class StatsYear { - public SchoolsDict Schools = new(); public int NumStudents; + public SchoolsDict Schools = new(); public int Year; public static StatsYear From(List rankings) { - var statsYear = new StatsYear(); - - statsYear.Year = rankings.First(r => r.Year != null).Year!.Value; // just hilarious - statsYear.NumStudents = - rankings.Select(r => (r.RankingSummary ?? r.CreateSummary()).HowManyStudents ?? 0).Sum(); // this ?? is crazy + var statsYear = new StatsYear + { + Year = rankings.First(r => r.Year != null).Year!.Value, // just hilarious + NumStudents = rankings.Select(r => (r.RankingSummary ?? r.CreateSummary()).HowManyStudents ?? 0) + .Sum() // this ?? is crazy + }; var bySchool = rankings.Where(r => r.School != null).GroupBy(r => r.School!.Value); foreach (var schoolGroup in bySchool) { var statsSchool = StatsSchool.From(schoolGroup.ToList()); - if (statsYear.Schools.ContainsKey(schoolGroup.Key)) throw new UnreachableException(); // should be impossible, right? + if (statsYear.Schools.ContainsKey(schoolGroup.Key)) + throw new UnreachableException(); // should be impossible, right? statsYear.Schools.Add(schoolGroup.Key, statsSchool); } - + return statsYear; } @@ -62,7 +65,7 @@ private bool ExitIfThereIsntAnUpdate(string fullJsonPath) var savedStats = JsonConvert.DeserializeObject(saved, Culture.JsonSerializerSettings); if (string.IsNullOrEmpty(saved) || savedStats == null) return false; - + var savedHash = savedStats.GetHashWithoutLastUpdate(); var hash = GetHashWithoutLastUpdate(); return savedHash == hash; diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs index 56fbc52b0..2eac3d2fc 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs @@ -30,23 +30,6 @@ public class Ranking : IComparable, IEquatable public RankingUrl? Url; public int? Year; - public RankingSummaryStudent GetRankingSummaryStudent() - { - return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url); - } - - public static Ranking? FromJson(string fullPath) - { - // if (!File.Exists(fullPath)) return null; - // - // var str = File.ReadAllText(fullPath); - // var ranking = JsonConvert.DeserializeObject(str, Culture.JsonSerializerSettings); - // return ranking; - - // consider merging the two functions at some point - return Utils.Transformer.ParserNS.Parser.ParseJsonRanking(fullPath); - } - public int CompareTo(Ranking? other) { if (ReferenceEquals(this, other)) return 0; @@ -62,6 +45,23 @@ public bool Equals(Ranking? other) return GetHashWithoutLastUpdate() == other.GetHashWithoutLastUpdate(); } + public RankingSummaryStudent GetRankingSummaryStudent() + { + return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url); + } + + public static Ranking? FromJson(string fullPath) + { + // if (!File.Exists(fullPath)) return null; + // + // var str = File.ReadAllText(fullPath); + // var ranking = JsonConvert.DeserializeObject(str, Culture.JsonSerializerSettings); + // return ranking; + + // consider merging the two functions at some point + return Utils.Transformer.ParserNS.Parser.ParseJsonRanking(fullPath); + } + public bool IsSimilarTo(Ranking ranking) { @@ -142,19 +142,18 @@ public void WriteAsJson(string outFolder, bool forceReparse = false) { var folderPath = GetBasePath(outFolder); Directory.CreateDirectory(folderPath); - + var fullPath = GetFullPath(outFolder); var savedRanking = FromJson(fullPath); var equalsSaved = savedRanking != null && Equals(savedRanking); - if (forceReparse || equalsSaved || savedRanking == null) - { - var rankingJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); - File.WriteAllText(fullPath, rankingJsonString); - } + if (!forceReparse && equalsSaved) return; + + var rankingJsonString = JsonConvert.SerializeObject(this, Culture.JsonSerializerSettings); + File.WriteAllText(fullPath, rankingJsonString); } - + /*** * Ottieni l'hash senza considerare il valore di LastUpdate */ @@ -177,10 +176,19 @@ public int GetHashWithoutLastUpdate() i = ByCourse.Aggregate(i, (current, variable) => { var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate(); - var iList = hashWithoutLastUpdate; - return current ^ iList; + return current ^ hashWithoutLastUpdate; }); return i; } + + public override bool Equals(object? obj) + { + return Equals(obj as Ranking); + } + + public override int GetHashCode() + { + return GetHashWithoutLastUpdate(); + } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs index 908d396aa..d129cfa4e 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingsSet.cs @@ -11,27 +11,19 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.RankingNS; [JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))] public class RankingsSet { - public DateTime? LastUpdate; - public List Rankings; - - public RankingsSet() - { - Rankings = new List(); - LastUpdate = DateTime.UtcNow; - } + public DateTime? LastUpdate = DateTime.UtcNow; + public List Rankings = new(); public void Merge(RankingsSet newSet) { foreach (var ranking in newSet.Rankings) { var alreadyPresent = Rankings.Any(v => v.IsSimilarTo(ranking)); - if (!alreadyPresent) - { - Rankings.Add(ranking); - - if (LastUpdate == null || ranking.LastUpdate.Date > LastUpdate?.Date) - LastUpdate = ranking.LastUpdate; - } + if (alreadyPresent) continue; + Rankings.Add(ranking); + + if (LastUpdate == null || ranking.LastUpdate.Date > LastUpdate?.Date) + LastUpdate = ranking.LastUpdate; } } diff --git a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs index 17aac09bc..4e6360fae 100644 --- a/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs +++ b/PoliNetwork.Graduatorie.Parser/Objects/Tables/Course/CourseTable.cs @@ -31,8 +31,8 @@ public RankingSummaryStudent GetRankingSummaryStudent(Ranking ranking) } /// - /// Get the course location if present, otherwise get the placeholder (constant). - /// Useful for index purposes. + /// Get the course location if present, otherwise get the placeholder (constant). + /// Useful for index purposes. /// /// A string with the location or the placeholder public string GetFixedLocation() @@ -41,7 +41,6 @@ public string GetFixedLocation() // esempio: Urbanistica 2022 ha un solo corso senza location, ma anche quello // deve comparire nella lista // fix: se un corso non ha location, si inserisce un valore 0 - if (string.IsNullOrEmpty(Location)) return Constants.LocationPlaceholder; - return Location; + return string.IsNullOrEmpty(Location) ? Constants.LocationPlaceholder : Location; } } \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs index 86d4ae066..80396565d 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/HashMatricoleWrite.cs @@ -12,21 +12,21 @@ namespace PoliNetwork.Graduatorie.Parser.Utils.Output; public class HashMatricoleWrite { - internal const string FolderName = "hashMatricole"; - public IdsDict IdsDict = new(); + private const string FolderName = "hashMatricole"; + private IdsDict _idsDict = new(); public static HashMatricoleWrite From(RankingsSet rankingsSet) { return new HashMatricoleWrite { - IdsDict = GetIdsDict(rankingsSet) + _idsDict = GetIdsDict(rankingsSet) }; } - + public void Write(string outFolder) { - Console.WriteLine($"[INFO] Students with id are {IdsDict.Keys.Count}"); + Console.WriteLine($"[INFO] Students with id are {_idsDict.Keys.Count}"); var groupsDict = GetGroupsDict(); var hashMatricoleFolder = Path.Join(outFolder, FolderName); @@ -43,7 +43,7 @@ public void Write(string outFolder) private static IdsDict GetIdsDict(RankingsSet rankingsSet) { - var dictionary = new SortedDictionary(); + var dictionary = new IdsDict(); foreach (var ranking in rankingsSet.Rankings) { var byMeritRows = ranking.ByMerit?.Rows; @@ -63,17 +63,14 @@ private static IdsDict GetIdsDict(RankingsSet rankingsSet) foreach (var student in row.Where(studentResult => !string.IsNullOrEmpty(studentResult.Id))) { var id = student.Id!; - + if (!dictionary.ContainsKey(id)) dictionary.Add(id, new StudentHashSummary()); dictionary[id].Merge(student, ranking, courseTable); } } } - foreach (var item in dictionary.Values) - { - item.Sort(); - } + foreach (var item in dictionary.Values) item.Sort(); return dictionary; } @@ -81,7 +78,7 @@ private static IdsDict GetIdsDict(RankingsSet rankingsSet) private SortedDictionary GetGroupsDict() { var groupsDict = new SortedDictionary(); - var groups = IdsDict.GroupBy(pair => pair.Key[..2]); + var groups = _idsDict.GroupBy(pair => pair.Key[..2]); foreach (var group in groups) { @@ -89,11 +86,8 @@ private SortedDictionary GetGroupsDict() var groupVal = group.ToList(); var groupIdsDict = new IdsDict(); - foreach (var (id, studentHashSummary) in groupVal) - { - groupIdsDict.Add(id, studentHashSummary); - } - + foreach (var (id, studentHashSummary) in groupVal) groupIdsDict.Add(id, studentHashSummary); + groupsDict.Add(groupId, groupIdsDict); } diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs index 1f422b5ed..3e294db8f 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/OutputWriteUtil.cs @@ -23,9 +23,9 @@ public OutputWriteUtil(ArgsConfig argsConfig) public void SaveOutputs(RankingsSet rankingsSet, DateFound dateFound) { var outFolder = Path.Join(_config.DataFolder, Constants.OutputFolder); - + rankingsSet.WriteAllRankings(outFolder, _config.ForceReparsing); - IndexJsonBase.WriteAllIndexes(rankingsSet, outFolder, _config); + IndexJsonBase.WriteAllIndexes(rankingsSet, outFolder); StatsJson.From(rankingsSet).Write(outFolder, _config); HashMatricoleWrite.From(rankingsSet).Write(outFolder); diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs b/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs index 2a0299f88..52e783ef4 100644 --- a/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs +++ b/PoliNetwork.Graduatorie.Parser/Utils/Output/RankingSummaryStudent.cs @@ -40,6 +40,14 @@ public RankingSummaryStudent(string? phase, SchoolEnum? school, int? year, Ranki Url = url; } + public bool Equals(RankingSummaryStudent? other) + { + if (ReferenceEquals(null, other)) return false; + if (ReferenceEquals(this, other)) return true; + return Course == other.Course && Phase == other.Phase && School == other.School && Equals(Url, other.Url) && + Year == other.Year; + } + public int Compare(RankingSummaryStudent o) { var i = (Year ?? 0) - (o.Year ?? 0); @@ -70,13 +78,6 @@ public override bool Equals(object? obj) Year == rankingSummaryStudent.Year && equals && Course == rankingSummaryStudent.Course; } - public bool Equals(RankingSummaryStudent? other) - { - if (ReferenceEquals(null, other)) return false; - if (ReferenceEquals(this, other)) return true; - return Course == other.Course && Phase == other.Phase && School == other.School && Equals(Url, other.Url) && Year == other.Year; - } - public override int GetHashCode() { return HashCode.Combine(Course, Phase, School, Url, Year); diff --git a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs b/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs deleted file mode 100644 index fc32e7a43..000000000 --- a/PoliNetwork.Graduatorie.Parser/Utils/Transformer/ParserNS/Converter.cs +++ /dev/null @@ -1,49 +0,0 @@ -#region - -using PoliNetwork.Graduatorie.Common.Objects; -using PoliNetwork.Graduatorie.Parser.Objects; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course; -using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit; - -#endregion - -namespace PoliNetwork.Graduatorie.Parser.Utils.Transformer.ParserNS; - -public static class Converter -{ - public static StudentResult FromMeritTableToStudentResult(MeritTableRow row) - { - var rowCanEnroll = row.CanEnroll ?? false; - var rowCanEnrollInto = rowCanEnroll ? row.CanEnrollInto : null; - return new StudentResult - { - Id = row.Id, - Ofa = row.Ofa, - Result = row.Result, - BirthDate = null, - PositionAbsolute = row.Position, - PositionCourse = null, - SectionsResults = null, - EnglishCorrectAnswers = null, - EnrollType = EnrollType.From(rowCanEnrollInto, rowCanEnroll) - }; - } - - - public static StudentResult FromCourseTableRowToStudentResult(CourseTableRow row, Table course) - { - var rowCanEnroll = row.CanEnroll ?? false; - return new StudentResult - { - Id = row.Id, - Ofa = row.Ofa, - Result = row.Result, - BirthDate = row.BirthDate, - EnrollType = EnrollType.From(course.CourseTitle, rowCanEnroll), - PositionAbsolute = null, - PositionCourse = row.Position, - SectionsResults = row.SectionsResults, - EnglishCorrectAnswers = row.EnglishCorrectAnswers - }; - } -} \ No newline at end of file diff --git a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs index 0313a1706..24ca314e0 100644 --- a/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs +++ b/PoliNetwork.Graduatorie.Scraper/Utils/Web/ScraperOutput.cs @@ -9,7 +9,6 @@ namespace PoliNetwork.Graduatorie.Scraper.Utils.Web; public static class ScraperOutput { - public static List GetWithUrlsFromLocalFileLinks(IEnumerable urls, string dataFolder) { var links = GetSaved(dataFolder); @@ -32,11 +31,11 @@ private static List GetSaved(string dataFolder) { // consider to handle them Console.WriteLine($"[ERROR] Can't validate the ScraperOutput file ({filePath})"); - return new(); + return new List(); } } - private static List GetLines(string filePath) + private static IEnumerable GetLines(string filePath) { try { @@ -46,7 +45,7 @@ private static List GetLines(string filePath) { Console.WriteLine(exception); Console.WriteLine($"[ERROR] Can't read the ScraperOutput file ({filePath})"); - return new(); + return new List(); } } @@ -84,7 +83,7 @@ private static string SelectorUrlWithEndLine(string url) { return url + "\n"; } - + private static string GetFilePath(string dataFolder) { return Path.Join(dataFolder, Constants.OutputLinksFilename);