Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion PoliNetwork.Graduatorie.Common/Enums/SchoolEnum.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static string ToShortName(this SchoolEnum s)
SchoolEnum.Design => "DES",
SchoolEnum.Ingegneria => "ENG",
SchoolEnum.Urbanistica => "URB",
_ => "UNK",
_ => "UNK"
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
using PoliNetwork.Graduatorie.Parser.Objects.RankingNS;
using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course;
using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit;
using PoliNetwork.Graduatorie.Parser.Utils;

#endregion

Expand Down Expand Up @@ -92,20 +91,20 @@ private static bool SameHashCourse(IReadOnlyCollection<CourseTable>? aTableCours

var aHash = aTableCourse.Select(variable =>
{
var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate());
var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate();
return hashWithoutLastUpdate;
}).ToList();

var bHash = bTableCourse.Select(variable =>
{
var hashWithoutLastUpdate = Hashing.GetHashFromListHash(variable.GetHashWithoutLastUpdate());
var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate();
return hashWithoutLastUpdate;
}).ToList();

var ai = Hashing.GetHashFromListHash(aHash);
var bi = Hashing.GetHashFromListHash(bHash);
var ai = aHash;
var bi = bHash;

return (ai ?? 0) == (bi ?? 0);
return ai == bi;
}

private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMerit)
Expand All @@ -117,9 +116,7 @@ private static bool SameHashMerit(MeritTable? aTableMerit, MeritTable? bTableMer

var ai = aTableMerit.GetHashWithoutLastUpdate();
var bi = bTableMerit.GetHashWithoutLastUpdate();
var aii = Hashing.GetHashFromListHash(ai) ?? 0;
var bii = Hashing.GetHashFromListHash(bi) ?? 0;
return aii == bii;
return ai == bi;
}

private static Ranking? GetRankingFromFile(string path)
Expand Down
28 changes: 14 additions & 14 deletions PoliNetwork.Graduatorie.Parser/Objects/Json/SingleCourseJson.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,20 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.Json;

[Serializable]
[JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))]
public class SingleCourseJson: IComparable<SingleCourseJson>
public class SingleCourseJson : IComparable<SingleCourseJson>
{
public string? BasePath;
public string? Id;
public string? Link;
public string? Location;
public string? Id;
public RankingOrder? RankingOrder;
public SchoolEnum? School;
public int? Year;

public int GetHashWithoutLastUpdate()
{
var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode();
var hashCode = Id?.GetHashCode() ?? "Id".GetHashCode();
var basePathInt = BasePath?.GetHashCode() ?? "BasePath".GetHashCode();
var yearInt = Year?.GetHashCode() ?? "Year".GetHashCode();
var schoolInt = School?.GetHashCode() ?? "School".GetHashCode();
var code = "SingleCourseJson".GetHashCode();
return hashWithoutLastUpdate ^ hashCode ^ basePathInt ^ yearInt ^ schoolInt ^ code;
}

public int CompareTo(SingleCourseJson? singleCourseJson)
{
if (singleCourseJson == null) return 1;

if (Year != singleCourseJson.Year)
return (Year ?? -1) < (singleCourseJson.Year ?? -1) ? -1 : 1;

Expand All @@ -57,6 +46,17 @@ public int CompareTo(SingleCourseJson? singleCourseJson)
return 0;
}

public int GetHashWithoutLastUpdate()
{
var hashWithoutLastUpdate = Link?.GetHashCode() ?? "Link".GetHashCode();
var hashCode = Id?.GetHashCode() ?? "Id".GetHashCode();
var basePathInt = BasePath?.GetHashCode() ?? "BasePath".GetHashCode();
var yearInt = Year?.GetHashCode() ?? "Year".GetHashCode();
var schoolInt = School?.GetHashCode() ?? "School".GetHashCode();
var code = "SingleCourseJson".GetHashCode();
return hashWithoutLastUpdate ^ hashCode ^ basePathInt ^ yearInt ^ schoolInt ^ code;
}

public bool Is(CourseTable courseTable)
{
return (RankingOrder?.Phase ?? "") == courseTable.Title;
Expand Down
21 changes: 10 additions & 11 deletions PoliNetwork.Graduatorie.Parser/Objects/RankingNS/Ranking.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
using PoliNetwork.Graduatorie.Parser.Objects.Json.Stats;
using PoliNetwork.Graduatorie.Parser.Objects.Tables.Course;
using PoliNetwork.Graduatorie.Parser.Objects.Tables.Merit;
using PoliNetwork.Graduatorie.Parser.Utils;
using PoliNetwork.Graduatorie.Parser.Utils.Output;

#endregion
Expand All @@ -30,19 +29,19 @@ public class Ranking : IComparable<Ranking>
public RankingUrl? Url;
public int? Year;

public RankingSummaryStudent GetRankingSummaryStudent()
{
return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url);
}

public int CompareTo(Ranking? other)
{
if (ReferenceEquals(this, other)) return 0;
if (ReferenceEquals(null, other)) return 1;

return string.Compare(GetId(), other.GetId(), StringComparison.Ordinal);
}

public RankingSummaryStudent GetRankingSummaryStudent()
{
return new RankingSummaryStudent(RankingOrder?.Phase, School, Year, Url);
}


/***
* Ottieni l'hash senza considerare il valore di LastUpdate
Expand All @@ -57,7 +56,7 @@ public int GetHashWithoutLastUpdate()
i ^= Url?.GetHashWithoutLastUpdate() ?? "Url".GetHashCode();
i ^= Year?.GetHashCode() ?? "Year".GetHashCode();
var iMerit = ByMerit?.GetHashWithoutLastUpdate();
i ^= Hashing.GetHashFromListHash(iMerit) ?? "ByMerit".GetHashCode();
i ^= iMerit ?? "ByMerit".GetHashCode();


if (ByCourse == null)
Expand All @@ -66,7 +65,7 @@ public int GetHashWithoutLastUpdate()
i = ByCourse.Aggregate(i, (current, variable) =>
{
var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate();
var iList = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty".GetHashCode();
var iList = hashWithoutLastUpdate;
return current ^ iList;
});

Expand Down Expand Up @@ -113,7 +112,7 @@ public string GetFilename()
public string GetId()
{
var idList = new List<string>();

var schoolShort = School?.ToShortName();
if (schoolShort != null) idList.Add(schoolShort);

Expand All @@ -122,7 +121,7 @@ public string GetId()

var orderId = RankingOrder?.GetId();
if (orderId != null) idList.Add(orderId);

var fallback = DateTime.UtcNow.ToString("yyyyMMddTHHmmss", CultureInfo.InvariantCulture) + "Z";
if (idList.Count == 0) idList.Add(fallback);

Expand Down
31 changes: 14 additions & 17 deletions PoliNetwork.Graduatorie.Parser/Objects/RankingNS/RankingOrder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ namespace PoliNetwork.Graduatorie.Parser.Objects.RankingNS;
[JsonObject(MemberSerialization.Fields, NamingStrategyType = typeof(CamelCaseNamingStrategy))]
public class RankingOrder
{
public bool IsAnticipata; // used for DES/URB rankings until 2023
public bool IsEnglish;
public bool IsExtraEu;
public string? Phase; // the original string (e.g. "

//esempio:
//seconda graduatoria di seconda fase: {primary:2,secondary:2}
//prima graduatoria di seconda fase:{primary:2, secondary:1}
public int? Primary;
public int? Secondary;
public bool IsAnticipata; // used for DES/URB rankings until 2023
public bool IsExtraEu;
public bool IsEnglish;

public RankingOrder(string phase, bool isExtraEu = false, bool isEnglish = false)
{
Phase = phase;
ParsePhaseString(phase);

IsExtraEu = isExtraEu;
IsEnglish = isEnglish;
}
Expand All @@ -35,12 +35,12 @@ private void ParsePhaseString(string phase)
{
var s = phase.ToUpper().Trim();
if (string.IsNullOrEmpty(s)) return;

var strings = s.Split(" ");

IsAnticipata = s.Contains("ANTICIPATA");
if (IsAnticipata) return;

Primary = ExtractPhaseNumberByKey(strings, "FASE");
Secondary = ExtractPhaseNumberByKey(strings, "GRADUATORIA");
}
Expand Down Expand Up @@ -74,22 +74,19 @@ private void ParsePhaseString(string phase)
public string GetId()
{
var idList = new List<string>();
if (IsAnticipata) idList.Add($"anticipata");
if (IsAnticipata) idList.Add("anticipata");
if (Primary != null) idList.Add($"{Primary}fase");
if (Secondary != null) idList.Add($"{Secondary}grad");

var cleanPhase = Phase?.Replace("_", "").Replace("-", "").Replace(" ", "_").ToLower() ?? "";
var noOrder = IsAnticipata == false && Primary == null && Secondary == null;
var noOrder = IsAnticipata == false && Primary == null && Secondary == null;
var isSingleExtraEu = noOrder && cleanPhase.Contains("extraue");

if (noOrder)
{
idList.Add(isSingleExtraEu ? "extraeu" : cleanPhase);
}

if (noOrder) idList.Add(isSingleExtraEu ? "extraeu" : cleanPhase);

idList.Add(IsEnglish ? "eng" : "ita");
if (IsExtraEu && !isSingleExtraEu) idList.Add("extraeu"); // the second condition is to avoid double extraeu

var id = string.Join("_", idList);
return id;
}
Expand All @@ -111,4 +108,4 @@ public void Merge(RankingOrder? rankingRankingOrder)
Primary ??= rankingRankingOrder?.Primary;
Secondary ??= rankingRankingOrder?.Secondary;
}
}
}
5 changes: 3 additions & 2 deletions PoliNetwork.Graduatorie.Parser/Objects/StudentResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using Newtonsoft.Json;
using Newtonsoft.Json.Serialization;
using PoliNetwork.Graduatorie.Common.Objects;
using PoliNetwork.Graduatorie.Parser.Utils;

#endregion

Expand All @@ -22,7 +23,7 @@ public class StudentResult
public decimal? Result;
public SortedDictionary<string, decimal>? SectionsResults;

public List<int?> GetHashWithoutLastUpdate()
public int GetHashWithoutLastUpdate()
{
var r = new List<int?>
{
Expand All @@ -47,6 +48,6 @@ public class StudentResult
r.Add(SectionsResults.Aggregate("SectionsResultsFull".GetHashCode(),
(current, variable) => current ^ variable.Key.GetHashCode() ^ variable.Value.GetHashCode()));

return r;
return Hashing.GetHashFromListHash(r);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public class MeritTable
public List<StudentResult>? Rows;
public int? Year;

public List<int?> GetHashWithoutLastUpdate()
public int GetHashWithoutLastUpdate()
{
var r = new List<int?> { "MeritTable".GetHashCode() };
if (Headers != null)
Expand All @@ -30,14 +30,13 @@ public class MeritTable
r.Add(Rows.Aggregate("RowsFull".GetHashCode(), (current, variable) =>
{
var hashWithoutLastUpdate = variable.GetHashWithoutLastUpdate();
var hashFromListHash = Hashing.GetHashFromListHash(hashWithoutLastUpdate) ?? "empty3".GetHashCode();
return current ^ hashFromListHash;
return current ^ hashWithoutLastUpdate;
}));
else
r.Add("RowsEmpty".GetHashCode());

r.Add(Year?.GetHashCode() ?? "Year".GetHashCode());
r.Add(Path?.GetHashCode() ?? "Path".GetHashCode());
return r;
return Hashing.GetHashFromListHash(r);
}
}
9 changes: 5 additions & 4 deletions PoliNetwork.Graduatorie.Parser/Utils/Hashing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ namespace PoliNetwork.Graduatorie.Parser.Utils;

public static class Hashing
{
public static int? GetHashFromListHash(IReadOnlyCollection<int?>? iMerit)
public static int GetHashFromListHash(IReadOnlyCollection<int?>? iMerit)
{
if (iMerit == null)
return null;
return 0;
if (iMerit.Count == 0)
return null;
return 0;

return iMerit.Aggregate(0, (current, variable) => current ^ variable ?? 0);
var hashFromListHash = iMerit.Aggregate(0, (current, variable) => current ^ variable ?? 0);
return hashFromListHash;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -208,26 +208,27 @@ private RankingsSet ParseNewRankings(IReadOnlyCollection<HtmlPage> htmls)
var extraEuStr = intestazioni[4].Split("\n")[0].ToLower();
var isExtraEu = extraEuStr.Contains("extra-ue");

if (ranking.Year < 2024) {
if (ranking.Year < 2024)
{
// layout valid until 2023
var phase = string.Join(" ", intestazioni[3].Split(" - ")[1..]);
ranking.RankingOrder = new RankingOrder(phase, isExtraEu);
if (ranking.School == SchoolEnum.Architettura && ranking.RankingOrder.Primary == null &&
ranking.RankingOrder.Secondary == null && ranking.RankingOrder.IsExtraEu)
{
// this is a fallback for 2020-2023:
// POLIMI was used to add the ranking number (Secondary, e.g. "Prima Graduatoria") for ExtraEU starting
// from the second ranking.
// e.g. Extra-EU first ranking => phase = "Extra-ue",
// Extra-EU second ranking => phase = "Extra-ue - Seconda Graduatoria"
// so this is a fallback to add the equivalent of "Prima Graduatoria" to the first ExtraEU ranking.

ranking.RankingOrder.Secondary = 1;
}
} else {
}
else
{
// layout valid since 2024 (if the layout changes again, make another else if)
var phase = intestazioni[3];
var isEnglish = intestazioni[2].Contains("taught in english") || intestazioni[2].Contains("erogati in inglese");
var isEnglish = intestazioni[2].Contains("taught in english") ||
intestazioni[2].Contains("erogati in inglese");
ranking.RankingOrder = new RankingOrder(phase, isExtraEu, isEnglish);
}

Expand Down Expand Up @@ -774,4 +775,4 @@ private IEnumerable<HtmlPage> ParseLocalHtmlFiles()
);
return obj2;
}
}
}
7 changes: 4 additions & 3 deletions PoliNetwork.Graduatorie.Scraper/Utils/Web/Scraper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ private IEnumerable<string> ScrapeAvvisiFuturiStudenti()
var page = _web.Load(AvvisiFuturiStudentiUrl).DocumentNode;

var newsCards =
page.SelectNodes("//div[contains(@class, 'news')]//div[contains(@class, 'row--card')]//div[contains(@class, 'card__content')]");
page.SelectNodes(
"//div[contains(@class, 'news')]//div[contains(@class, 'row--card')]//div[contains(@class, 'card__content')]");
if (newsCards == null) return links;

foreach (var card in newsCards)
{
var title = card.Descendants("h5").First();
var titleValid = title != null && IsValidText(title.InnerText);

var body = card.Descendants("p").Where(el => el.ParentNode.HasClass("news-bodytext")).First();
var body = card.Descendants("p").First(el => el.ParentNode.HasClass("news-bodytext"));
var bodyValid = body != null && IsValidText(body.InnerText);

var aTag = card.Descendants("a").First();
Expand Down Expand Up @@ -134,4 +135,4 @@ private bool IsValidText(string text)
return null;
}
}
}
}
Loading