From c3058cf977329377109d475c1de7e25c945e5658 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Wed, 18 Jan 2023 20:51:18 +0100 Subject: [PATCH 01/22] Added get_dates() internal --- src/eAsisitent_scraper/scraper.py | 61 +++++++++++++++++-------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 6ccd244..cb4bd3f 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -7,6 +7,22 @@ from bs4 import BeautifulSoup +def get_dates(table_row: bs4.element.Tag) -> list[datetime.datetime]: + dates: list = [] + for days in table_row: + if type(days) == bs4.element.Tag: + day = days.select("div") + if day[0].text != "Ura": + temp_date = re.findall(r"[^A-z,. ]+", day[1].text) + temp_datetime = datetime.datetime( + day=int(temp_date[0]), + month=int(temp_date[1]), + year=today.year, + ) + dates.append(temp_datetime) + return dates + + def request_schedule( school_id: str, class_id=0, @@ -91,8 +107,7 @@ def get_schedule_data( count: int = -1 - dates: list = [] - dates_formatted: list = [] + # x.strftime("%Y-%m-%d") hour_times: list = [] scraped_data: dict = {} @@ -113,20 +128,10 @@ def get_schedule_data( ) for table_row in table_rows: + print(type(table_row)) if count == -1: - for days in table_row: - if type(days) == bs4.element.Tag: - day = days.select("div") - if day[0].text != "Ura": - temp_date = re.findall(r"[^A-z,. ]+", day[1].text) - temp_datetime = datetime.datetime( - day=int(temp_date[0]), - month=int(temp_date[1]), - year=today.year, - ) - dates_formatted.append( - str(temp_datetime.strftime("%Y-%m-%d"))) - dates.append(temp_datetime) + dates = get_dates(table_row) + if count >= 0: row = table_row.find_all("td", class_="ednevnik-seznam_ur_teden-td") @@ -190,18 +195,18 @@ def get_schedule_data( try: subject = ( section.find(class_="text14") - .text.replace("\n", "") - .replace("\t", "") + .text.replace("\n", "") + .replace("\t", "") ) group_raw = section.find_all( class_="text11 gray bold" ) teacher_classroom = ( section.find(class_="text11") - .text.replace("\n", "") - .replace("\t", "") - .replace("\r", "") - .split(", ") + .text.replace("\n", "") + .replace("\t", "") + .replace("\r", "") + .split(", ") ) teacher = teacher_classroom[0] classroom = teacher_classroom[1] @@ -253,18 +258,18 @@ def get_schedule_data( try: subject = ( block.find(class_="text14") - .text.replace("\n", "") - .replace("\t", "") + .text.replace("\n", "") + .replace("\t", "") ) group_raw = block.find_all( class_="text11 gray bold" ) teacher_classroom = ( block.find(class_="text11") - .text.replace("\n", "") - .replace("\t", "") - .replace("\r", "") - .split(", ") + .text.replace("\n", "") + .replace("\t", "") + .replace("\r", "") + .split(", ") ) teacher = teacher_classroom[0] classroom = teacher_classroom[ @@ -315,7 +320,7 @@ def get_schedule_data( count += 1 scraped_data["request_data"] = {} scraped_data["request_data"]["hour_times"] = hour_times - scraped_data["request_data"]["dates"] = dates_formatted + scraped_data["request_data"]["dates"] = [x.strftime("%Y-%m-%d") for x in dates] scraped_data["request_data"]["class"] = current_class scraped_data["request_data"]["request_week"] = current_week scraped_data["request_data"]["request_epoch"] = request_time From e58dad2bc07b731957c9295db39622e2d054092a Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Wed, 18 Jan 2023 21:14:25 +0100 Subject: [PATCH 02/22] Use enumerate() for count in loops --- src/eAsisitent_scraper/scraper.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index cb4bd3f..92115a2 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -23,6 +23,12 @@ def get_dates(table_row: bs4.element.Tag) -> list[datetime.datetime]: return dates +def get_hour_time_data(row: bs4.element.ResultSet) -> tuple[str, str]: + hour_name = str(row[0].find(class_="text14").text) + hour_time = str(row[0].find(class_="text10").text.replace(" ", "")) + return hour_name, hour_time + + def request_schedule( school_id: str, class_id=0, @@ -105,9 +111,6 @@ def get_schedule_data( soup = BeautifulSoup(response.text, "html5lib") table_rows = soup.select("body > table > tbody > tr") - count: int = -1 - - # x.strftime("%Y-%m-%d") hour_times: list = [] scraped_data: dict = {} @@ -127,20 +130,17 @@ def get_schedule_data( [item.text.strip() for item in soup.select("body > div > strong")][0] ) - for table_row in table_rows: - print(type(table_row)) - if count == -1: + for count, table_row in enumerate(table_rows): + if count == 0: dates = get_dates(table_row) - if count >= 0: + if count >= 1: row = table_row.find_all("td", class_="ednevnik-seznam_ur_teden-td") - hour_name = str(row[0].find(class_="text14").text) - hour_time = row[0].find(class_="text10").text.replace(" ", "") + hour_name, hour_time = get_hour_time_data(row) hour_times.append(hour_time) - count2: int = 0 - for row_part in row: + for count2, row_part in enumerate(row): if count2 != 0: """Pass the first collum that contains hour times""" date = dates[count2 - 1] @@ -316,8 +316,6 @@ def get_schedule_data( str(classes_in_hour) ] = data_out classes_in_hour += 1 - count2 += 1 - count += 1 scraped_data["request_data"] = {} scraped_data["request_data"]["hour_times"] = hour_times scraped_data["request_data"]["dates"] = [x.strftime("%Y-%m-%d") for x in dates] From fb048d28246daa54e8bbcb1a4d4f5d041fde58f1 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Wed, 18 Jan 2023 21:18:21 +0100 Subject: [PATCH 03/22] Convert datetime.datetime to datetime.date --- src/eAsisitent_scraper/scraper.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 92115a2..05d5ee3 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -7,14 +7,18 @@ from bs4 import BeautifulSoup -def get_dates(table_row: bs4.element.Tag) -> list[datetime.datetime]: +def format_date(date: datetime.date) -> str: + return str(date.strftime("%Y-%m-%d")) + + +def get_dates(table_row: bs4.element.Tag) -> list[datetime.date]: dates: list = [] for days in table_row: if type(days) == bs4.element.Tag: day = days.select("div") if day[0].text != "Ura": temp_date = re.findall(r"[^A-z,. ]+", day[1].text) - temp_datetime = datetime.datetime( + temp_datetime = datetime.date( day=int(temp_date[0]), month=int(temp_date[1]), year=today.year, From cd8c9e05669c87bac620f90c785895f107ee4493 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Wed, 18 Jan 2023 21:34:27 +0100 Subject: [PATCH 04/22] Added get_hour_data() --- src/eAsisitent_scraper/scraper.py | 106 ++++++++++++------------------ 1 file changed, 43 insertions(+), 63 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 05d5ee3..10f8c85 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -3,9 +3,43 @@ import re import requests import time +from dataclasses import dataclass from bs4 import BeautifulSoup +EVENT_MAP = { + "Odpadla ura": "cancelled", + "Dogodek": "event", + "Nadomeščanje": "substitute", + "Polovična ura": "half_hour", + "Videokonferenca": "video_call", + "Interesna dejavnost": "activity", + "Zaposlitev": "occupation", + "Neopravljena ura": "unfinished_hour", + "Govorilne ure": "office hours", + "Izpiti": "exams", +} + + +@dataclass() +class Formatting: + SUBJECT_CLASS = "text14" + RAW_GROUP_CLASS = "text11 gray bold" + TEACHER_CLASSROOM_CLASS = "text11" + + +def get_hour_data(section) -> tuple[str, str, str]: + subject = section.find(class_=Formatting.SUBJECT_CLASS).text.replace("\n", "").replace("\t", "") + group_raw = section.find_all(class_=Formatting.RAW_GROUP_CLASS) + teacher_classroom = ( + section.find(class_=Formatting.TEACHER_CLASSROOM_CLASS) + .text.replace("\n", "") + .replace("\t", "") + .replace("\r", "") + .split(", ") + ) + return subject, group_raw, teacher_classroom + def format_date(date: datetime.date) -> str: return str(date.strftime("%Y-%m-%d")) @@ -27,7 +61,7 @@ def get_dates(table_row: bs4.element.Tag) -> list[datetime.date]: return dates -def get_hour_time_data(row: bs4.element.ResultSet) -> tuple[str, str]: +def get_hours_time_data(row: bs4.element.ResultSet) -> tuple[str, str]: hour_name = str(row[0].find(class_="text14").text) hour_time = str(row[0].find(class_="text10").text.replace(" ", "")) return hour_name, hour_time @@ -116,7 +150,7 @@ def get_schedule_data( table_rows = soup.select("body > table > tbody > tr") hour_times: list = [] - + dates: list[datetime.date] = [] scraped_data: dict = {} current_week = int( @@ -141,7 +175,7 @@ def get_schedule_data( if count >= 1: row = table_row.find_all("td", class_="ednevnik-seznam_ur_teden-td") - hour_name, hour_time = get_hour_time_data(row) + hour_name, hour_time = get_hours_time_data(row) hour_times.append(hour_time) for count2, row_part in enumerate(row): @@ -149,7 +183,7 @@ def get_schedule_data( """Pass the first collum that contains hour times""" date = dates[count2 - 1] day_num = str(date.weekday()) - date_formatted = str(date.strftime("%Y-%m-%d")) + date_formatted = format_date(date) if day_num not in scraped_data.keys(): scraped_data.update({str(day_num): {}}) scraped_data[day_num].update({str(hour_name): {}}) @@ -177,41 +211,14 @@ def get_schedule_data( group = [] teacher = None classroom = None - teacher_classroom = None for img in section.select("img"): - events_list = { - "Odpadla ura": "cancelled", - "Dogodek": "event", - "Nadomeščanje": "substitute", - "Polovična ura": "half_hour", - "Videokonferenca": "video_call", - "Interesna dejavnost": "activity", - "Zaposlitev": "occupation", - "Neopravljena ura": "unfinished_hour", - "Govorilne ure": "office_hours", - "Izpiti": "exams", - } try: - event = events_list[img.attrs["title"]] + event = EVENT_MAP[img.attrs["title"]] except KeyError: event = "unknown_event" try: - subject = ( - section.find(class_="text14") - .text.replace("\n", "") - .replace("\t", "") - ) - group_raw = section.find_all( - class_="text11 gray bold" - ) - teacher_classroom = ( - section.find(class_="text11") - .text.replace("\n", "") - .replace("\t", "") - .replace("\r", "") - .split(", ") - ) + subject, group_raw, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] classroom = teacher_classroom[1] except IndexError: @@ -239,42 +246,15 @@ def get_schedule_data( group = [] teacher = None classroom = None - teacher_classroom = None for img in block.select("img"): - events_list = { - "Odpadla ura": "cancelled", - "Dogodek": "event", - "Nadomeščanje": "substitute", - "Polovična ura": "half_hour", - "Videokonferenca": "video_call", - "Interesna dejavnost": "activity", - "Zaposlitev": "occupation", - "Neopravljena ura": "unfinished_hour", - "Govorilne ure": "office hours", - "Izpiti": "exams", - } try: - event = events_list[ + event = EVENT_MAP[ img.attrs["title"] ] except KeyError: event = "unknown_event" try: - subject = ( - block.find(class_="text14") - .text.replace("\n", "") - .replace("\t", "") - ) - group_raw = block.find_all( - class_="text11 gray bold" - ) - teacher_classroom = ( - block.find(class_="text11") - .text.replace("\n", "") - .replace("\t", "") - .replace("\r", "") - .split(", ") - ) + subject, group_raw, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] classroom = teacher_classroom[ 1] @@ -322,7 +302,7 @@ def get_schedule_data( classes_in_hour += 1 scraped_data["request_data"] = {} scraped_data["request_data"]["hour_times"] = hour_times - scraped_data["request_data"]["dates"] = [x.strftime("%Y-%m-%d") for x in dates] + scraped_data["request_data"]["dates"] = [format_date(x) for x in dates] scraped_data["request_data"]["class"] = current_class scraped_data["request_data"]["request_week"] = current_week scraped_data["request_data"]["request_epoch"] = request_time From 585e43e40108e76be4e3343dcc4f55306bec8bf4 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Thu, 19 Jan 2023 16:39:22 +0100 Subject: [PATCH 05/22] Added make_data_out() --- src/eAsisitent_scraper/scraper.py | 90 +++++++++++++++---------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 10f8c85..d68a15a 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -41,6 +41,30 @@ def get_hour_data(section) -> tuple[str, str, str]: return subject, group_raw, teacher_classroom +def make_data_out( + date: datetime.date, + subject: str = None, + teacher: str = None, + classroom: str = None, + group: list = None, + event: str = None, + hour_name: str = None, + week_day: str = None, + hour_in_block: int = None +) -> dict: + return { + "subject": subject, + "teacher": teacher, + "classroom": classroom, + "group": group, + "event": event, + "hour": hour_name, + "week_day": int(week_day), + "hour_in_block": hour_in_block, + "date": format_date(date), + } + + def format_date(date: datetime.date) -> str: return str(date.strftime("%Y-%m-%d")) @@ -183,23 +207,12 @@ def get_schedule_data( """Pass the first collum that contains hour times""" date = dates[count2 - 1] day_num = str(date.weekday()) - date_formatted = format_date(date) if day_num not in scraped_data.keys(): scraped_data.update({str(day_num): {}}) scraped_data[day_num].update({str(hour_name): {}}) if "style" not in row_part.attrs: - data_out = { - "subject": None, - "teacher": None, - "classroom": None, - "group": None, - "event": None, - "hour": hour_name, - "week_day": int(day_num), - "hour_in_block": 0, - "date": date_formatted, - } + data_out = make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) scraped_data[day_num][hour_name]["0"] = data_out else: classes_in_hour = 0 @@ -210,7 +223,7 @@ def get_schedule_data( group_raw = None group = [] teacher = None - classroom = None + hour_classroom = None for img in section.select("img"): try: event = EVENT_MAP[img.attrs["title"]] @@ -220,7 +233,7 @@ def get_schedule_data( try: subject, group_raw, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] - classroom = teacher_classroom[1] + hour_classroom = teacher_classroom[1] except IndexError: pass # Makes it so empty strings don't # crash the program @@ -230,13 +243,15 @@ def get_schedule_data( if group_raw: for gr in group_raw: group.append(gr.text) - if ("id" in section.attrs) and bool( - re.match( - r"ednevnik-seznam_ur_teden-blok" - r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d", - section.attrs["id"], - ) - ): + is_block_hour = ("id" in section.attrs) and bool( + re.match( + r"ednevnik-seznam_ur_teden-blok" + r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d", + section.attrs["id"], + ) + ) + + if is_block_hour: # Check for blocks for block in section: if type(block) == bs4.element.Tag: @@ -245,7 +260,7 @@ def get_schedule_data( group_raw = None group = [] teacher = None - classroom = None + hour_classroom = None for img in block.select("img"): try: event = EVENT_MAP[ @@ -256,7 +271,7 @@ def get_schedule_data( try: subject, group_raw, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] - classroom = teacher_classroom[ + hour_classroom = teacher_classroom[ 1] except IndexError: pass @@ -267,35 +282,18 @@ def get_schedule_data( if group_raw: for gr in group_raw: group.append(gr.text) - data_out = { - "subject": subject, - "teacher": teacher, - "classroom": classroom, - "group": group, - "event": event, - "hour": hour_name, - "week_day": int(day_num), - "hour_in_block": int( - classes_in_hour), - "date": date_formatted, - } + data_out = make_data_out( + date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour + ) scraped_data[day_num][hour_name][ str(classes_in_hour) ] = data_out classes_in_hour += 1 else: - data_out = { - "subject": subject, - "teacher": teacher, - "classroom": classroom, - "group": group, - "event": event, - "hour": hour_name, - "week_day": int(day_num), - "hour_in_block": int(classes_in_hour), - "date": date_formatted, - } + data_out = make_data_out( + date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour + ) scraped_data[day_num][hour_name][ str(classes_in_hour) ] = data_out From 9a83dc4ce8a108f022d34eb5b4378ad0de80a4a3 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Thu, 19 Jan 2023 19:22:51 +0100 Subject: [PATCH 06/22] Create dataclasses --- src/eAsisitent_scraper/scraper.py | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index d68a15a..7f42e7d 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -28,6 +28,40 @@ class Formatting: TEACHER_CLASSROOM_CLASS = "text11" +@dataclass() +class HourBlock: + subject: str + teacher: str + classroom: str + group: list[str] + event: str + hour: str + hour_in_block: int + + +@dataclass() +class Hour: + name: str + blocks: list[HourBlock] + + +@dataclass() +class SchoolDay: + date: datetime + hours: list[Hour] + + +@dataclass() +class Schedule: + days: list[SchoolDay] + hour_times: list[str] + dates: list[str] + class_name: str + request_week: int + request_epoch: int + used_data: dict + + def get_hour_data(section) -> tuple[str, str, str]: subject = section.find(class_=Formatting.SUBJECT_CLASS).text.replace("\n", "").replace("\t", "") group_raw = section.find_all(class_=Formatting.RAW_GROUP_CLASS) From 04cbb8774fe374cb032a142d402622c653f7af01 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 18:59:52 +0100 Subject: [PATCH 07/22] Commit --- src/eAsisitent_scraper/scraper.py | 242 +++++++++++++++--------------- 1 file changed, 122 insertions(+), 120 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 7f42e7d..3e20f14 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -26,6 +26,8 @@ class Formatting: SUBJECT_CLASS = "text14" RAW_GROUP_CLASS = "text11 gray bold" TEACHER_CLASSROOM_CLASS = "text11" + EVENT_CLASS = "text14" + EVENT_STYLE = "border:none" @dataclass() @@ -37,6 +39,8 @@ class HourBlock: event: str hour: str hour_in_block: int + date: datetime.date + debug: str = None @dataclass() @@ -55,23 +59,30 @@ class SchoolDay: class Schedule: days: list[SchoolDay] hour_times: list[str] - dates: list[str] + dates: list[datetime.date] class_name: str request_week: int request_epoch: int used_data: dict -def get_hour_data(section) -> tuple[str, str, str]: +def get_hour_data(section: bs4.element.Tag) -> tuple[str, str, str]: subject = section.find(class_=Formatting.SUBJECT_CLASS).text.replace("\n", "").replace("\t", "") group_raw = section.find_all(class_=Formatting.RAW_GROUP_CLASS) - teacher_classroom = ( - section.find(class_=Formatting.TEACHER_CLASSROOM_CLASS) - .text.replace("\n", "") - .replace("\t", "") - .replace("\r", "") - .split(", ") - ) + try: + teacher_classroom = ( + section.find(class_=Formatting.TEACHER_CLASSROOM_CLASS) + .text.replace("\n", "") + .replace("\t", "") + .replace("\r", "") + .split(", ") + ) + except AttributeError: + subject = section.find(class_=Formatting.EVENT_CLASS).text.replace("\n", "").replace("\t", "") + teacher_classroom = [None, None] + # print("--------------") + # print(section) + # print(repr(event)) return subject, group_raw, teacher_classroom @@ -99,6 +110,21 @@ def make_data_out( } +def make_data_out_v2( + date: datetime.date, + subject: str = None, + teacher: str = None, + classroom: str = None, + group: list = None, + event: str = None, + hour_name: str = None, + week_day: str = None, + hour_in_block: int = None, + debug=None +) -> HourBlock: + return HourBlock(subject, teacher, classroom, group, event, hour_name, hour_in_block, date, debug) + + def format_date(date: datetime.date) -> str: return str(date.strftime("%Y-%m-%d")) @@ -134,7 +160,7 @@ def request_schedule( school_week=0, student_id=0, soup=False, -): +) -> requests.models.Response: """ It requests schedule from easistent.com and returns it as a response @@ -174,7 +200,7 @@ def get_schedule_data( interest_activity=0, school_week=0, student_id=0, -): +) -> Schedule: """ Date format is: YYYY-MM-DD If school id is invalid ValueError is raised @@ -222,124 +248,101 @@ def get_schedule_data( ) ) ) - current_class = str( + class_name = str( [item.text.strip() for item in soup.select("body > div > strong")][0] ) - + finla_bundle_pre_turn = [] for count, table_row in enumerate(table_rows): + bundle_hour: list[Hour] = [] if count == 0: dates = get_dates(table_row) - - if count >= 1: - row = table_row.find_all("td", - class_="ednevnik-seznam_ur_teden-td") - hour_name, hour_time = get_hours_time_data(row) - hour_times.append(hour_time) - - for count2, row_part in enumerate(row): - if count2 != 0: - """Pass the first collum that contains hour times""" - date = dates[count2 - 1] - day_num = str(date.weekday()) - if day_num not in scraped_data.keys(): - scraped_data.update({str(day_num): {}}) - scraped_data[day_num].update({str(hour_name): {}}) - - if "style" not in row_part.attrs: - data_out = make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) - scraped_data[day_num][hour_name]["0"] = data_out - else: - classes_in_hour = 0 - for section in row_part: - if type(section) == bs4.element.Tag: - event = None - subject = None - group_raw = None - group = [] - teacher = None - hour_classroom = None - for img in section.select("img"): - try: - event = EVENT_MAP[img.attrs["title"]] - except KeyError: - event = "unknown_event" - - try: + continue + + row = table_row.find_all("td", + class_="ednevnik-seznam_ur_teden-td") + hour_name, hour_time = get_hours_time_data(row) + hour_times.append(hour_time) + for count2, row_part in enumerate(row): + if count2 != 0: + bundle_hour_block = Hour(hour_name, []) + """Pass the first collum that contains hour times""" + date = dates[count2 - 1] + day_num = str(date.weekday()) + if day_num not in scraped_data.keys(): + scraped_data.update({str(day_num): []}) + scraped_data[day_num].append(Hour(hour_name, [])) + + if "style" not in row_part.attrs: # Detect empty hours + data_out = make_data_out_v2(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) + # scraped_data[day_num][count - 1].blocks.append(data_out) + bundle_hour_block.blocks.append(data_out) + else: + classes_in_hour = 0 + for section in row_part: + if type(section) != bs4.element.Tag: + continue + event = None + group = [] + for img in section.select("img"): + try: + event = EVENT_MAP[img.attrs["title"]] + except KeyError: + event = "unknown_event" + subject, group_raw, teacher_classroom = get_hour_data(section) + teacher = teacher_classroom[0] + hour_classroom = teacher_classroom[1] + if group_raw: + for gr in group_raw: + group.append(gr.text) + is_block_hour = ("id" in section.attrs) and bool( + re.match( + r"ednevnik-seznam_ur_teden-blok" + r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d", + section.attrs["id"], + ) + ) + + if is_block_hour: + # Check for blocks + for block in section: + if type(block) == bs4.element.Tag: + event = None + group = [] + for img in block.select("img"): + try: + event = EVENT_MAP[ + img.attrs["title"] + ] + except KeyError: + event = "unknown_event" subject, group_raw, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] hour_classroom = teacher_classroom[1] - except IndexError: - pass # Makes it so empty strings don't - # crash the program - except AttributeError: - pass # Makes it so empty strings don't - # crash the program - if group_raw: - for gr in group_raw: - group.append(gr.text) - is_block_hour = ("id" in section.attrs) and bool( - re.match( - r"ednevnik-seznam_ur_teden-blok" - r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d", - section.attrs["id"], - ) - ) - - if is_block_hour: - # Check for blocks - for block in section: - if type(block) == bs4.element.Tag: - event = None - subject = None - group_raw = None - group = [] - teacher = None - hour_classroom = None - for img in block.select("img"): - try: - event = EVENT_MAP[ - img.attrs["title"] - ] - except KeyError: - event = "unknown_event" - try: - subject, group_raw, teacher_classroom = get_hour_data(section) - teacher = teacher_classroom[0] - hour_classroom = teacher_classroom[ - 1] - except IndexError: - pass - except AttributeError: - pass # Makes it so empty - # strings don't crash the - # program - if group_raw: - for gr in group_raw: - group.append(gr.text) - data_out = make_data_out( - date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour - ) - scraped_data[day_num][hour_name][ - str(classes_in_hour) - ] = data_out - classes_in_hour += 1 - - else: - data_out = make_data_out( + if group_raw: + for gr in group_raw: + group.append(gr.text) + data_out = make_data_out_v2( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) - scraped_data[day_num][hour_name][ - str(classes_in_hour) - ] = data_out + bundle_hour_block.blocks.append(data_out) + + # print(data_out) + + # scraped_data[day_num][count - 1].blocks.append(data_out) classes_in_hour += 1 - scraped_data["request_data"] = {} - scraped_data["request_data"]["hour_times"] = hour_times - scraped_data["request_data"]["dates"] = [format_date(x) for x in dates] - scraped_data["request_data"]["class"] = current_class - scraped_data["request_data"]["request_week"] = current_week - scraped_data["request_data"]["request_epoch"] = request_time - scraped_data["request_data"]["used_data"] = \ - { + else: + data_out = make_data_out_v2( + date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour + ) + # print(data_out) + # scraped_data[day_num][count - 1].blocks.append(data_out) + bundle_hour_block.blocks.append(data_out) + + classes_in_hour += 1 + bundle_hour.append(bundle_hour_block) + finla_bundle_pre_turn.append(bundle_hour) + r = [SchoolDay(None, list(x)) for x in list(zip(*finla_bundle_pre_turn))] + used_data = { "school_id": school_id, "class_id": class_id, "professor": professor, @@ -348,5 +351,4 @@ def get_schedule_data( "school_week": school_week, "student_id": student_id } - - return scraped_data + return Schedule(r, hour_times, dates, class_name, current_week, request_time, used_data) From ba0bbe4b1881a4fbdd2e8e130a399316133bbc65 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 19:30:35 +0100 Subject: [PATCH 08/22] Added get_event() --- src/eAsisitent_scraper/scraper.py | 77 +++++++------------------------ 1 file changed, 16 insertions(+), 61 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 3e20f14..f0cf452 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -66,11 +66,11 @@ class Schedule: used_data: dict -def get_hour_data(section: bs4.element.Tag) -> tuple[str, str, str]: +def get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: subject = section.find(class_=Formatting.SUBJECT_CLASS).text.replace("\n", "").replace("\t", "") group_raw = section.find_all(class_=Formatting.RAW_GROUP_CLASS) try: - teacher_classroom = ( + teacher_classroom = list( section.find(class_=Formatting.TEACHER_CLASSROOM_CLASS) .text.replace("\n", "") .replace("\t", "") @@ -80,34 +80,12 @@ def get_hour_data(section: bs4.element.Tag) -> tuple[str, str, str]: except AttributeError: subject = section.find(class_=Formatting.EVENT_CLASS).text.replace("\n", "").replace("\t", "") teacher_classroom = [None, None] - # print("--------------") - # print(section) - # print(repr(event)) return subject, group_raw, teacher_classroom -def make_data_out( - date: datetime.date, - subject: str = None, - teacher: str = None, - classroom: str = None, - group: list = None, - event: str = None, - hour_name: str = None, - week_day: str = None, - hour_in_block: int = None -) -> dict: - return { - "subject": subject, - "teacher": teacher, - "classroom": classroom, - "group": group, - "event": event, - "hour": hour_name, - "week_day": int(week_day), - "hour_in_block": hour_in_block, - "date": format_date(date), - } +def get_event(section: bs4.element.Tag) -> str: + for img in section.select("img"): + return img.attrs["title"] def make_data_out_v2( @@ -235,7 +213,6 @@ def get_schedule_data( hour_times: list = [] dates: list[datetime.date] = [] - scraped_data: dict = {} current_week = int( "".join( @@ -268,32 +245,23 @@ def get_schedule_data( """Pass the first collum that contains hour times""" date = dates[count2 - 1] day_num = str(date.weekday()) - if day_num not in scraped_data.keys(): - scraped_data.update({str(day_num): []}) - scraped_data[day_num].append(Hour(hour_name, [])) - if "style" not in row_part.attrs: # Detect empty hours data_out = make_data_out_v2(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) - # scraped_data[day_num][count - 1].blocks.append(data_out) bundle_hour_block.blocks.append(data_out) else: classes_in_hour = 0 for section in row_part: if type(section) != bs4.element.Tag: continue - event = None group = [] - for img in section.select("img"): - try: - event = EVENT_MAP[img.attrs["title"]] - except KeyError: - event = "unknown_event" + event = get_event(section) subject, group_raw, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] hour_classroom = teacher_classroom[1] if group_raw: for gr in group_raw: group.append(gr.text) + is_block_hour = ("id" in section.attrs) and bool( re.match( r"ednevnik-seznam_ur_teden-blok" @@ -306,15 +274,8 @@ def get_schedule_data( # Check for blocks for block in section: if type(block) == bs4.element.Tag: - event = None group = [] - for img in block.select("img"): - try: - event = EVENT_MAP[ - img.attrs["title"] - ] - except KeyError: - event = "unknown_event" + event = get_event(section) subject, group_raw, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] hour_classroom = teacher_classroom[1] @@ -325,17 +286,11 @@ def get_schedule_data( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) bundle_hour_block.blocks.append(data_out) - - # print(data_out) - - # scraped_data[day_num][count - 1].blocks.append(data_out) classes_in_hour += 1 else: data_out = make_data_out_v2( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) - # print(data_out) - # scraped_data[day_num][count - 1].blocks.append(data_out) bundle_hour_block.blocks.append(data_out) classes_in_hour += 1 @@ -343,12 +298,12 @@ def get_schedule_data( finla_bundle_pre_turn.append(bundle_hour) r = [SchoolDay(None, list(x)) for x in list(zip(*finla_bundle_pre_turn))] used_data = { - "school_id": school_id, - "class_id": class_id, - "professor": professor, - "classroom": classroom, - "interest_activity": interest_activity, - "school_week": school_week, - "student_id": student_id - } + "school_id": school_id, + "class_id": class_id, + "professor": professor, + "classroom": classroom, + "interest_activity": interest_activity, + "school_week": school_week, + "student_id": student_id + } return Schedule(r, hour_times, dates, class_name, current_week, request_time, used_data) From 71d5405e25400c98d71aaab0c85c3fa047ef5f6a Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 19:46:50 +0100 Subject: [PATCH 09/22] Remove EVENT_MAP, refactor --- src/eAsisitent_scraper/scraper.py | 51 +++++++++++-------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index f0cf452..9b8549c 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -7,18 +7,6 @@ from bs4 import BeautifulSoup -EVENT_MAP = { - "Odpadla ura": "cancelled", - "Dogodek": "event", - "Nadomeščanje": "substitute", - "Polovična ura": "half_hour", - "Videokonferenca": "video_call", - "Interesna dejavnost": "activity", - "Zaposlitev": "occupation", - "Neopravljena ura": "unfinished_hour", - "Govorilne ure": "office hours", - "Izpiti": "exams", -} @dataclass() @@ -80,7 +68,9 @@ def get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: except AttributeError: subject = section.find(class_=Formatting.EVENT_CLASS).text.replace("\n", "").replace("\t", "") teacher_classroom = [None, None] - return subject, group_raw, teacher_classroom + group = [x.text for x in group_raw] + group = None if group == [] else group + return subject, group, teacher_classroom def get_event(section: bs4.element.Tag) -> str: @@ -253,14 +243,10 @@ def get_schedule_data( for section in row_part: if type(section) != bs4.element.Tag: continue - group = [] event = get_event(section) - subject, group_raw, teacher_classroom = get_hour_data(section) + subject, group, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] hour_classroom = teacher_classroom[1] - if group_raw: - for gr in group_raw: - group.append(gr.text) is_block_hour = ("id" in section.attrs) and bool( re.match( @@ -273,20 +259,17 @@ def get_schedule_data( if is_block_hour: # Check for blocks for block in section: - if type(block) == bs4.element.Tag: - group = [] - event = get_event(section) - subject, group_raw, teacher_classroom = get_hour_data(section) - teacher = teacher_classroom[0] - hour_classroom = teacher_classroom[1] - if group_raw: - for gr in group_raw: - group.append(gr.text) - data_out = make_data_out_v2( - date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour - ) - bundle_hour_block.blocks.append(data_out) - classes_in_hour += 1 + if type(block) != bs4.element.Tag: + continue + event = get_event(section) + subject, group, teacher_classroom = get_hour_data(section) + teacher = teacher_classroom[0] + hour_classroom = teacher_classroom[1] + data_out = make_data_out_v2( + date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour + ) + bundle_hour_block.blocks.append(data_out) + classes_in_hour += 1 else: data_out = make_data_out_v2( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour @@ -296,7 +279,7 @@ def get_schedule_data( classes_in_hour += 1 bundle_hour.append(bundle_hour_block) finla_bundle_pre_turn.append(bundle_hour) - r = [SchoolDay(None, list(x)) for x in list(zip(*finla_bundle_pre_turn))] + school_days_list = [SchoolDay(None, list(x)) for x in list(zip(*finla_bundle_pre_turn))] used_data = { "school_id": school_id, "class_id": class_id, @@ -306,4 +289,4 @@ def get_schedule_data( "school_week": school_week, "student_id": student_id } - return Schedule(r, hour_times, dates, class_name, current_week, request_time, used_data) + return Schedule(school_days_list, hour_times, dates, class_name, current_week, request_time, used_data) From cc499bc660e92813ffbcf17f61ecdcb7807f2426 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 20:05:29 +0100 Subject: [PATCH 10/22] rename variables --- src/eAsisitent_scraper/scraper.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 9b8549c..9ce3092 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from bs4 import BeautifulSoup - +from requests import Response @dataclass() @@ -78,7 +78,7 @@ def get_event(section: bs4.element.Tag) -> str: return img.attrs["title"] -def make_data_out_v2( +def make_data_out( date: datetime.date, subject: str = None, teacher: str = None, @@ -128,7 +128,7 @@ def request_schedule( school_week=0, student_id=0, soup=False, -) -> requests.models.Response: +) -> BeautifulSoup | Response: """ It requests schedule from easistent.com and returns it as a response @@ -218,7 +218,7 @@ def get_schedule_data( class_name = str( [item.text.strip() for item in soup.select("body > div > strong")][0] ) - finla_bundle_pre_turn = [] + final_bundle_pre_turn = [] for count, table_row in enumerate(table_rows): bundle_hour: list[Hour] = [] if count == 0: @@ -232,11 +232,11 @@ def get_schedule_data( for count2, row_part in enumerate(row): if count2 != 0: bundle_hour_block = Hour(hour_name, []) - """Pass the first collum that contains hour times""" + """Pass the first column that contains hour times""" date = dates[count2 - 1] day_num = str(date.weekday()) if "style" not in row_part.attrs: # Detect empty hours - data_out = make_data_out_v2(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) + data_out = make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) bundle_hour_block.blocks.append(data_out) else: classes_in_hour = 0 @@ -265,21 +265,21 @@ def get_schedule_data( subject, group, teacher_classroom = get_hour_data(section) teacher = teacher_classroom[0] hour_classroom = teacher_classroom[1] - data_out = make_data_out_v2( + data_out = make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) bundle_hour_block.blocks.append(data_out) classes_in_hour += 1 else: - data_out = make_data_out_v2( + data_out = make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) bundle_hour_block.blocks.append(data_out) classes_in_hour += 1 bundle_hour.append(bundle_hour_block) - finla_bundle_pre_turn.append(bundle_hour) - school_days_list = [SchoolDay(None, list(x)) for x in list(zip(*finla_bundle_pre_turn))] + final_bundle_pre_turn.append(bundle_hour) + school_days_list = [SchoolDay(None, list(x)) for x in list(zip(*final_bundle_pre_turn))] used_data = { "school_id": school_id, "class_id": class_id, From 2cdf713d16227d1d3141ca9400109c54e8e57bca Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 22:13:18 +0100 Subject: [PATCH 11/22] Added UsedData dataclass --- src/eAsisitent_scraper/scraper.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 9ce3092..7b95166 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -43,6 +43,17 @@ class SchoolDay: hours: list[Hour] +@dataclass() +class UsedData: + school_id: str + class_id: int + professor: int + classroom: int + interest_activity: int + school_week: int + student_id: int + + @dataclass() class Schedule: days: list[SchoolDay] @@ -51,7 +62,7 @@ class Schedule: class_name: str request_week: int request_epoch: int - used_data: dict + used_data: UsedData def get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: @@ -280,13 +291,5 @@ def get_schedule_data( bundle_hour.append(bundle_hour_block) final_bundle_pre_turn.append(bundle_hour) school_days_list = [SchoolDay(None, list(x)) for x in list(zip(*final_bundle_pre_turn))] - used_data = { - "school_id": school_id, - "class_id": class_id, - "professor": professor, - "classroom": classroom, - "interest_activity": interest_activity, - "school_week": school_week, - "student_id": student_id - } + used_data = UsedData(school_id, class_id, professor, classroom, interest_activity, school_week, student_id) return Schedule(school_days_list, hour_times, dates, class_name, current_week, request_time, used_data) From 385bb3990e163170c9c08571e311a99f51b92698 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 22:14:00 +0100 Subject: [PATCH 12/22] Removed soup option form the request --- src/eAsisitent_scraper/scraper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 7b95166..c38f2c5 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -138,8 +138,7 @@ def request_schedule( interest_activity=0, school_week=0, student_id=0, - soup=False, -) -> BeautifulSoup | Response: +) -> Response: """ It requests schedule from easistent.com and returns it as a response @@ -163,8 +162,6 @@ def request_schedule( if response.text == "Šola ni veljavna!" or response.text == "Šola ni izbrana!": raise ValueError("This school does not exist. school_id is invalid") - if soup: - return BeautifulSoup(response.text, "html5lib") return response From a336869f5c0b3d57792c810a2d69001649e5a2c2 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 22:14:52 +0100 Subject: [PATCH 13/22] Removed outdated comments --- src/eAsisitent_scraper/scraper.py | 34 +------------------------------ 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index c38f2c5..b64c41b 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -139,22 +139,6 @@ def request_schedule( school_week=0, student_id=0, ) -> Response: - """ - It requests schedule from easistent.com and returns it as a response - - :param school_id: The ID of the school you want to get data for - :type school_id: str - :param class_id: The ID of the class you want to get data for, 0 is all classes, defaults to 0 (optional) - :param professor: The ID of the professor you want to get data for, 0 is all professors, defaults to 0 (optional) - :param classroom: The classroom you want to get data for, 0 is all classrooms, defaults to 0 (optional) - :param interest_activity: The activity you want to get data for, 0 is all interest activities, defaults to 0 (optional) - :param school_week: school week that you want to get the data for, 0 is the current week, defaults to 0 (optional) - :param student_id: The ID of the student you want to get the schedule for,0 is all students, defaults to 0 (optional) - :param soup: Return a BeautifulSoup object (optional) - :return: A response object is a requests.models.Response object. - - - """ url = f"https://www.easistent.com/urniki/izpis/{school_id}/{class_id}/{professor}/{classroom}/{interest_activity}/{school_week}/{student_id}" @@ -177,23 +161,7 @@ def get_schedule_data( school_week=0, student_id=0, ) -> Schedule: - """ - Date format is: YYYY-MM-DD - If school id is invalid ValueError is raised - - :param school_id: The ID of the school you want to get data for - :type school_id: str - :param class_id: The ID of the class you want to get data for, 0 is all classes, defaults to 0 (optional) - :param professor: The ID of the professor you want to get data for, 0 is all professors, defaults to 0 (optional) - :param classroom: The classroom you want to get data for, 0 is all classrooms, defaults to 0 (optional) - :param interest_activity: The activity you want to get data for, 0 is all interest activities, defaults to 0 (optional) - :param school_week: school week that you want to get the data for, 0 is the current week, defaults to 0 (optional) - :param student_id: The ID of the student you want to get the schedule for,0 is all students, defaults to 0 (optional) - :return: A dictionary with the data. - """ - - # TODO: reduce complexity of the function, - # better naming of variables, + response = request_schedule( school_id=school_id, class_id=class_id, From 6ed028d31d645a7d4e4bc2d86fff59867e127a24 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 22:23:51 +0100 Subject: [PATCH 14/22] Fix date being None --- src/eAsisitent_scraper/scraper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index b64c41b..e4c984a 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -15,7 +15,7 @@ class Formatting: RAW_GROUP_CLASS = "text11 gray bold" TEACHER_CLASSROOM_CLASS = "text11" EVENT_CLASS = "text14" - EVENT_STYLE = "border:none" + CLASS_NAME_CLASS = "text20" @dataclass() @@ -255,6 +255,6 @@ def get_schedule_data( classes_in_hour += 1 bundle_hour.append(bundle_hour_block) final_bundle_pre_turn.append(bundle_hour) - school_days_list = [SchoolDay(None, list(x)) for x in list(zip(*final_bundle_pre_turn))] + school_days_list = [SchoolDay(dates[index], list(x)) for index, x in enumerate(list(zip(*final_bundle_pre_turn)))] used_data = UsedData(school_id, class_id, professor, classroom, interest_activity, school_week, student_id) return Schedule(school_days_list, hour_times, dates, class_name, current_week, request_time, used_data) From edef58e76277abf9e75b72cb4c88a3d9d50f1336 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 22:27:54 +0100 Subject: [PATCH 15/22] Rename variable --- src/eAsisitent_scraper/scraper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index e4c984a..1345348 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -34,7 +34,7 @@ class HourBlock: @dataclass() class Hour: name: str - blocks: list[HourBlock] + hour_blocks: list[HourBlock] @dataclass() @@ -213,7 +213,7 @@ def get_schedule_data( day_num = str(date.weekday()) if "style" not in row_part.attrs: # Detect empty hours data_out = make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) - bundle_hour_block.blocks.append(data_out) + bundle_hour_block.hour_blocks.append(data_out) else: classes_in_hour = 0 for section in row_part: @@ -244,13 +244,13 @@ def get_schedule_data( data_out = make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) - bundle_hour_block.blocks.append(data_out) + bundle_hour_block.hour_blocks.append(data_out) classes_in_hour += 1 else: data_out = make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) - bundle_hour_block.blocks.append(data_out) + bundle_hour_block.hour_blocks.append(data_out) classes_in_hour += 1 bundle_hour.append(bundle_hour_block) From 94029b2cd0a4d2f04efe0a30194adf640e5cb2c7 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Fri, 20 Jan 2023 22:28:19 +0100 Subject: [PATCH 16/22] Remove debug --- src/eAsisitent_scraper/scraper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 1345348..1cf1552 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -28,7 +28,6 @@ class HourBlock: hour: str hour_in_block: int date: datetime.date - debug: str = None @dataclass() @@ -99,9 +98,8 @@ def make_data_out( hour_name: str = None, week_day: str = None, hour_in_block: int = None, - debug=None ) -> HourBlock: - return HourBlock(subject, teacher, classroom, group, event, hour_name, hour_in_block, date, debug) + return HourBlock(subject, teacher, classroom, group, event, hour_name, hour_in_block, date) def format_date(date: datetime.date) -> str: From b68f423ae15480a6b7e5248e5af7c807cddc2486 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Sat, 21 Jan 2023 10:20:16 +0100 Subject: [PATCH 17/22] Add __ prefix to functions --- src/eAsisitent_scraper/scraper.py | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 1cf1552..7532a43 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -64,7 +64,7 @@ class Schedule: used_data: UsedData -def get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: +def __get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: subject = section.find(class_=Formatting.SUBJECT_CLASS).text.replace("\n", "").replace("\t", "") group_raw = section.find_all(class_=Formatting.RAW_GROUP_CLASS) try: @@ -83,12 +83,12 @@ def get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: return subject, group, teacher_classroom -def get_event(section: bs4.element.Tag) -> str: +def __get_event(section: bs4.element.Tag) -> str: for img in section.select("img"): return img.attrs["title"] -def make_data_out( +def __make_data_out( date: datetime.date, subject: str = None, teacher: str = None, @@ -102,11 +102,11 @@ def make_data_out( return HourBlock(subject, teacher, classroom, group, event, hour_name, hour_in_block, date) -def format_date(date: datetime.date) -> str: +def __format_date(date: datetime.date) -> str: return str(date.strftime("%Y-%m-%d")) -def get_dates(table_row: bs4.element.Tag) -> list[datetime.date]: +def __get_dates(table_row: bs4.element.Tag) -> list[datetime.date]: dates: list = [] for days in table_row: if type(days) == bs4.element.Tag: @@ -122,13 +122,13 @@ def get_dates(table_row: bs4.element.Tag) -> list[datetime.date]: return dates -def get_hours_time_data(row: bs4.element.ResultSet) -> tuple[str, str]: +def __get_hours_time_data(row: bs4.element.ResultSet) -> tuple[str, str]: hour_name = str(row[0].find(class_="text14").text) hour_time = str(row[0].find(class_="text10").text.replace(" ", "")) return hour_name, hour_time -def request_schedule( +def __request_schedule( school_id: str, class_id=0, professor=0, @@ -160,7 +160,7 @@ def get_schedule_data( student_id=0, ) -> Schedule: - response = request_schedule( + response = __request_schedule( school_id=school_id, class_id=class_id, professor=professor, @@ -196,12 +196,12 @@ def get_schedule_data( for count, table_row in enumerate(table_rows): bundle_hour: list[Hour] = [] if count == 0: - dates = get_dates(table_row) + dates = __get_dates(table_row) continue row = table_row.find_all("td", class_="ednevnik-seznam_ur_teden-td") - hour_name, hour_time = get_hours_time_data(row) + hour_name, hour_time = __get_hours_time_data(row) hour_times.append(hour_time) for count2, row_part in enumerate(row): if count2 != 0: @@ -210,15 +210,15 @@ def get_schedule_data( date = dates[count2 - 1] day_num = str(date.weekday()) if "style" not in row_part.attrs: # Detect empty hours - data_out = make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) + data_out = __make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) bundle_hour_block.hour_blocks.append(data_out) else: classes_in_hour = 0 for section in row_part: if type(section) != bs4.element.Tag: continue - event = get_event(section) - subject, group, teacher_classroom = get_hour_data(section) + event = __get_event(section) + subject, group, teacher_classroom = __get_hour_data(section) teacher = teacher_classroom[0] hour_classroom = teacher_classroom[1] @@ -235,17 +235,17 @@ def get_schedule_data( for block in section: if type(block) != bs4.element.Tag: continue - event = get_event(section) - subject, group, teacher_classroom = get_hour_data(section) + event = __get_event(section) + subject, group, teacher_classroom = __get_hour_data(section) teacher = teacher_classroom[0] hour_classroom = teacher_classroom[1] - data_out = make_data_out( + data_out = __make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) bundle_hour_block.hour_blocks.append(data_out) classes_in_hour += 1 else: - data_out = make_data_out( + data_out = __make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) bundle_hour_block.hour_blocks.append(data_out) From 4ebdbb38e6bb002a5b6664e8469889ee3e7037a3 Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Sat, 21 Jan 2023 10:23:39 +0100 Subject: [PATCH 18/22] Return teacher, classoroo, instead of a list --- src/eAsisitent_scraper/scraper.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 7532a43..2cbf55a 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -64,7 +64,7 @@ class Schedule: used_data: UsedData -def __get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: +def __get_hour_data(section: bs4.element.Tag) -> tuple[str, list, str, str]: subject = section.find(class_=Formatting.SUBJECT_CLASS).text.replace("\n", "").replace("\t", "") group_raw = section.find_all(class_=Formatting.RAW_GROUP_CLASS) try: @@ -80,7 +80,7 @@ def __get_hour_data(section: bs4.element.Tag) -> tuple[str, list, list]: teacher_classroom = [None, None] group = [x.text for x in group_raw] group = None if group == [] else group - return subject, group, teacher_classroom + return subject, group, teacher_classroom[0], teacher_classroom[1] def __get_event(section: bs4.element.Tag) -> str: @@ -137,7 +137,6 @@ def __request_schedule( school_week=0, student_id=0, ) -> Response: - url = f"https://www.easistent.com/urniki/izpis/{school_id}/{class_id}/{professor}/{classroom}/{interest_activity}/{school_week}/{student_id}" response = requests.get(url) @@ -159,7 +158,6 @@ def get_schedule_data( school_week=0, student_id=0, ) -> Schedule: - response = __request_schedule( school_id=school_id, class_id=class_id, @@ -218,9 +216,7 @@ def get_schedule_data( if type(section) != bs4.element.Tag: continue event = __get_event(section) - subject, group, teacher_classroom = __get_hour_data(section) - teacher = teacher_classroom[0] - hour_classroom = teacher_classroom[1] + subject, group, teacher, hour_classroom = __get_hour_data(section) is_block_hour = ("id" in section.attrs) and bool( re.match( @@ -236,9 +232,7 @@ def get_schedule_data( if type(block) != bs4.element.Tag: continue event = __get_event(section) - subject, group, teacher_classroom = __get_hour_data(section) - teacher = teacher_classroom[0] - hour_classroom = teacher_classroom[1] + subject, group, teacher, hour_classroom = __get_hour_data(section) data_out = __make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) From 7f739056b4ad6ad57fb05228527dca5bda76a3dc Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Sat, 21 Jan 2023 10:31:44 +0100 Subject: [PATCH 19/22] Switched if for guard clause --- src/eAsisitent_scraper/scraper.py | 77 ++++++++++++++++--------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 2cbf55a..954ad53 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -202,50 +202,51 @@ def get_schedule_data( hour_name, hour_time = __get_hours_time_data(row) hour_times.append(hour_time) for count2, row_part in enumerate(row): - if count2 != 0: - bundle_hour_block = Hour(hour_name, []) - """Pass the first column that contains hour times""" - date = dates[count2 - 1] - day_num = str(date.weekday()) - if "style" not in row_part.attrs: # Detect empty hours - data_out = __make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) - bundle_hour_block.hour_blocks.append(data_out) - else: - classes_in_hour = 0 - for section in row_part: - if type(section) != bs4.element.Tag: - continue - event = __get_event(section) - subject, group, teacher, hour_classroom = __get_hour_data(section) - - is_block_hour = ("id" in section.attrs) and bool( - re.match( - r"ednevnik-seznam_ur_teden-blok" - r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d", - section.attrs["id"], - ) + if count2 == 0: + continue + bundle_hour_block = Hour(hour_name, []) + """Pass the first column that contains hour times""" + date = dates[count2 - 1] + day_num = str(date.weekday()) + if "style" not in row_part.attrs: # Detect empty hours + data_out = __make_data_out(date, hour_name=hour_name, week_day=day_num, hour_in_block=0) + bundle_hour_block.hour_blocks.append(data_out) + else: + classes_in_hour = 0 + for section in row_part: + if type(section) != bs4.element.Tag: + continue + event = __get_event(section) + subject, group, teacher, hour_classroom = __get_hour_data(section) + + is_block_hour = ("id" in section.attrs) and bool( + re.match( + r"ednevnik-seznam_ur_teden-blok" + r"-\d\d\d\d\d\d-\d\d\d\d-\d\d-\d\d", + section.attrs["id"], ) - - if is_block_hour: - # Check for blocks - for block in section: - if type(block) != bs4.element.Tag: - continue - event = __get_event(section) - subject, group, teacher, hour_classroom = __get_hour_data(section) - data_out = __make_data_out( - date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour - ) - bundle_hour_block.hour_blocks.append(data_out) - classes_in_hour += 1 - else: + ) + + if is_block_hour: + # Check for blocks + for block in section: + if type(block) != bs4.element.Tag: + continue + event = __get_event(section) + subject, group, teacher, hour_classroom = __get_hour_data(section) data_out = __make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) bundle_hour_block.hour_blocks.append(data_out) - classes_in_hour += 1 - bundle_hour.append(bundle_hour_block) + else: + data_out = __make_data_out( + date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour + ) + bundle_hour_block.hour_blocks.append(data_out) + + classes_in_hour += 1 + bundle_hour.append(bundle_hour_block) final_bundle_pre_turn.append(bundle_hour) school_days_list = [SchoolDay(dates[index], list(x)) for index, x in enumerate(list(zip(*final_bundle_pre_turn)))] used_data = UsedData(school_id, class_id, professor, classroom, interest_activity, school_week, student_id) From 5fd121d283dd993f6d7185d996c40936672959cf Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Sat, 21 Jan 2023 10:34:56 +0100 Subject: [PATCH 20/22] Replace if statment for guard clause --- src/eAsisitent_scraper/scraper.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index 954ad53..f1894e2 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -227,25 +227,25 @@ def get_schedule_data( ) ) - if is_block_hour: - # Check for blocks - for block in section: - if type(block) != bs4.element.Tag: - continue - event = __get_event(section) - subject, group, teacher, hour_classroom = __get_hour_data(section) - data_out = __make_data_out( - date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour - ) - bundle_hour_block.hour_blocks.append(data_out) - classes_in_hour += 1 - else: + if not is_block_hour: data_out = __make_data_out( date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour ) bundle_hour_block.hour_blocks.append(data_out) + classes_in_hour += 1 + continue + for block in section: + if type(block) != bs4.element.Tag: + continue + event = __get_event(section) + subject, group, teacher, hour_classroom = __get_hour_data(section) + data_out = __make_data_out( + date, subject, teacher, hour_classroom, group, event, hour_name, day_num, classes_in_hour + ) + bundle_hour_block.hour_blocks.append(data_out) classes_in_hour += 1 + bundle_hour.append(bundle_hour_block) final_bundle_pre_turn.append(bundle_hour) school_days_list = [SchoolDay(dates[index], list(x)) for index, x in enumerate(list(zip(*final_bundle_pre_turn)))] From 9de066fa7130622853d792aa14adfd95eec403de Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Sat, 21 Jan 2023 10:39:21 +0100 Subject: [PATCH 21/22] Remove a variable outside a function --- src/eAsisitent_scraper/scraper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py index f1894e2..684726f 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitent_scraper/scraper.py @@ -116,7 +116,7 @@ def __get_dates(table_row: bs4.element.Tag) -> list[datetime.date]: temp_datetime = datetime.date( day=int(temp_date[0]), month=int(temp_date[1]), - year=today.year, + year=datetime.date.today().year, ) dates.append(temp_datetime) return dates @@ -146,9 +146,6 @@ def __request_schedule( return response -today = datetime.date.today() - - def get_schedule_data( school_id: str, class_id=0, From 0a0ad7cb26ad5ab59664d5a1cbc60b799e73fd5d Mon Sep 17 00:00:00 2001 From: PingIsFun Date: Sat, 21 Jan 2023 11:01:03 +0100 Subject: [PATCH 22/22] renamed project to eAsistentAPI --- .gitignore | 4 ++-- README.md | 12 ++++++------ setup.cfg | 12 ++++++------ src/eAsisitentAPI/__init__.py | 2 ++ src/{eAsisitent_scraper => eAsisitentAPI}/scraper.py | 2 +- src/eAsisitent_scraper/__init__.py | 2 -- 6 files changed, 17 insertions(+), 17 deletions(-) create mode 100644 src/eAsisitentAPI/__init__.py rename src/{eAsisitent_scraper => eAsisitentAPI}/scraper.py (99%) delete mode 100644 src/eAsisitent_scraper/__init__.py diff --git a/.gitignore b/.gitignore index b7c90b8..3d5eaac 100644 --- a/.gitignore +++ b/.gitignore @@ -13,9 +13,9 @@ Pipfile.lock # Other random_code/ dist/ -src/eAsistent_scraper.egg-info/ +*.egg-info git_hidden/ # Not done yet tests/ -src/eAsisitent_scraper/pharser.py +src/eAsisitentAPI/pharser.py diff --git a/README.md b/README.md index 217998c..3a3ec6e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # eAsistent scraper (WIP) -*** -Scrapes data from *easistent.com/urniki/...* and returns it as Python dictionary -*** +## EN To install it run: pip install eAsistent-scraper @@ -11,11 +9,13 @@ To install it run: Example usage: ```python -import eAsisitent_scraper +import eAsisitentAPI -data = eAsisitent_scraper.get_schedule_data(school_id="SCHOOL_ID", class_id="CLASS_ID") +data = eAsisitentAPI.get_schedule(school_id="SCHOOL_ID", class_id=CLASS_ID) ``` *** For support and feature requests ask on [GitHub discussions](https://github.com/PingWasFun/eAsistent-scraper/discussions/categories/general) -To contribute fork the GitHub repository and make a pull request. +## SLO + +// TODO \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 8082450..feac667 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,15 +1,15 @@ [metadata] -name = eAsistent_scraper -version = 1.5.1 +name = eAsistentAPI +version = 2.0.0 author = PingIsFun author_email = pingisfun@protonmail.com description = Scrapes data from easistent.com/urniki/... and returns it as Python dictionary long_description = file: README.md long_description_content_type = text/markdown -url = https://github.com/PingWasFun/eAsistent-scraper +url = https://github.com/PingIsFun/eAsistentAPI project_urls = - Bug Tracker = https://github.com/PingWasFun/eAsistent-scraper/issues - Help = https://github.com/PingWasFun/eAsistent-scraper/discussions/categories/general + Bug Tracker = https://github.com/PingIsFun/eAsistentAPI/issues + Help = https://github.com/PingIsFun/eAsistentAPI/discussions/categories/general license = MIT License platform = any classifiers = @@ -33,7 +33,7 @@ install_requires = package_dir = = src packages = find: -python_requires = >=3.7 +python_requires = >=3.10 [options.packages.find] where = src diff --git a/src/eAsisitentAPI/__init__.py b/src/eAsisitentAPI/__init__.py new file mode 100644 index 0000000..ca87a63 --- /dev/null +++ b/src/eAsisitentAPI/__init__.py @@ -0,0 +1,2 @@ +from .scraper import get_schedule + diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitentAPI/scraper.py similarity index 99% rename from src/eAsisitent_scraper/scraper.py rename to src/eAsisitentAPI/scraper.py index 684726f..abeb904 100644 --- a/src/eAsisitent_scraper/scraper.py +++ b/src/eAsisitentAPI/scraper.py @@ -146,7 +146,7 @@ def __request_schedule( return response -def get_schedule_data( +def get_schedule( school_id: str, class_id=0, professor=0, diff --git a/src/eAsisitent_scraper/__init__.py b/src/eAsisitent_scraper/__init__.py deleted file mode 100644 index d14f304..0000000 --- a/src/eAsisitent_scraper/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .scraper import get_schedule_data -