PingIsFun · PingIsFun · Mar 22, 2022 · Mar 22, 2022 · Mar 22, 2022
diff --git a/src/eAsisitent_scraper/scraper.py b/src/eAsisitent_scraper/scraper.py
@@ -21,12 +21,12 @@ def request_schedule(
 
     :param school_id: The ID of the school you want to get data for
     :type school_id: str
-    :param class_id: The ID of the class you want to get data for, defaults to 0 (optional), defaults to 0 (optional)
-    :param professor: The ID of the professor you want to get data for, defaults to 0 (optional), defaults to 0 (optional)
-    :param classroom: The classroom you want to get data for, defaults to 0 (optional), defaults to 0 (optional)
-    :param interest_activity: The activity you want to get data for, defaults to 0 (optional)
-    :param school_week: 0 is the current week, 1 is the next week, 2 is the week after that, etc, defaults to 0 (optional)
-    :param student_id: The ID of the student you want to get the schedule for, defaults to 0 (optional)
+    :param class_id: The ID of the class you want to get data for, 0 is all classes, defaults to 0 (optional)
+    :param professor: The ID of the professor you want to get data for,  0 is all professors, defaults to 0 (optional)
+    :param classroom: The classroom you want to get data for,  0 is all classrooms, defaults to 0 (optional)
+    :param interest_activity: The activity you want to get data for, 0 is all interest activities, defaults to 0 (optional)
+    :param school_week: school week that you want to get the data for, 0 is the current week, defaults to 0 (optional)
+    :param student_id: The ID of the student you want to get the schedule for,0 is all students, defaults to 0 (optional)
     :param soup: Return a BeautifulSoup object (optional)
     :return: A response object is a requests.models.Response object.
 
@@ -47,20 +47,6 @@ def request_schedule(
 today = datetime.date.today()
 
 
-def hour_to_num(hour: str):
-    """
-    Convert hour name to integer
-
-    :param hour: the hour that you want to be converted to int
-    :type hour: str
-    :return: The hour as an integer.
-    """
-    if hour.lower() == "predura":
-        return int(0)
-    else:
-        return int(hour.split(". ura")[0])
-
-
 def get_schedule_data(
         school_id: str,
         class_id=0,
@@ -76,18 +62,17 @@ def get_schedule_data(
 
     :param school_id: The ID of the school you want to get data for
     :type school_id: str
-    :param class_id: The ID of the class you want to get data for, defaults to 0 (optional), defaults to 0 (optional)
-    :param professor: The ID of the professor you want to get data for, defaults to 0 (optional), defaults to 0 (optional)
-    :param classroom: The classroom you want to get data for, defaults to 0 (optional), defaults to 0 (optional)
-    :param interest_activity: The activity you want to get data for, defaults to 0 (optional)
-    :param school_week: 0 is the current week, 1 is the next week, 2 is the week after that, etc, defaults to 0 (optional)
-    :param student_id: The ID of the student you want to get the schedule for, defaults to 0 (optional)
+    :param class_id: The ID of the class you want to get data for, 0 is all classes, defaults to 0 (optional)
+    :param professor: The ID of the professor you want to get data for,  0 is all professors, defaults to 0 (optional)
+    :param classroom: The classroom you want to get data for,  0 is all classrooms, defaults to 0 (optional)
+    :param interest_activity: The activity you want to get data for, 0 is all interest activities, defaults to 0 (optional)
+    :param school_week: school week that you want to get the data for, 0 is the current week, defaults to 0 (optional)
+    :param student_id: The ID of the student you want to get the schedule for,0 is all students, defaults to 0 (optional)
     :return: A dictionary with the data.
     """
 
     # TODO: reduce complexity of the function,
     #  better naming of variables,
-    #  get template for scraped_data from template.json
     response = request_schedule(school_id=school_id,
                                 class_id=class_id,
                                 professor=professor,
@@ -96,64 +81,64 @@ def get_schedule_data(
                                 school_week=school_week,
                                 student_id=student_id)
     soup = BeautifulSoup(response.text, "html5lib")
-    seznam_ur_teden = soup.select("body > table > tbody > tr")
+    table_rows = soup.select("body > table > tbody > tr")
 
     count: int = -1
 
     dates: list = []
     dates_formatted: list = []
     hour_times: list = []
 
-    scraped_data: dict = {str(i): {str(j): {} for j in range(15)} for i in range(7)}
-    scraped_data["week_data"] = {"hour_times": [], "dates": [], "current_week": "", "class": ""}
+    scraped_data: dict = {str(i): {} for i in range(7)}
 
     current_week = int("".join(re.findall("[0-9]", [item.text.split(",")[0] for item in soup.select("body > div > span")][0])))
     current_class = str([item.text.strip() for item in soup.select("body > div > strong")][0])
 
-    for i in seznam_ur_teden:
+    for table_row in table_rows:
         if count == -1:
-            for days in i:
+            for days in table_row:
                 if type(days) == bs4.element.Tag:
                     day = days.select("div")
                     if day[0].text != "Ura":
                         temp_date = re.findall(r"[^A-z,. ]+", day[1].text)
                         temp_datetime = datetime.datetime(
-                                day=int(temp_date[0]),
-                                month=int(temp_date[1]),
-                                year=today.year,
-                            )
+                            day=int(temp_date[0]),
+                            month=int(temp_date[1]),
+                            year=today.year,
+                        )
                         dates_formatted.append(str(temp_datetime.strftime("%Y-%m-%d")))
                         dates.append(temp_datetime)
         if count >= 0:
-            row = i.find_all("td", class_="ednevnik-seznam_ur_teden-td")
-            hour_name = row[0].find(class_="text14").text
+            row = table_row.find_all("td", class_="ednevnik-seznam_ur_teden-td")
+            hour_name = str(row[0].find(class_="text14").text)
             hour_time = row[0].find(class_="text10").text
             hour_times.append(hour_time)
-            hour_num = str(hour_to_num(hour_name))
-            hour_num = str(hour_num)
+
             count2: int = 0
-            for block in row:
+            for row_part in row:
                 if count2 != 0:
                     """Pass the first collum that contains hour times"""
                     date = dates[count2 - 1]
                     day_num = str(date.weekday())
                     date_formatted = str(date.strftime("%Y-%m-%d"))
-                    if "style" not in block.attrs:
+                    scraped_data[day_num].update({str(hour_name): {}})
+
+                    if "style" not in row_part.attrs:
                         data_out = {
                             "subject": None,
                             "teacher": None,
                             "classroom": None,
                             "group": None,
                             "event": None,
-                            "hour": int(hour_num),
+                            "hour": hour_name,
                             "week_day": int(day_num),
                             "hour_in_block": 0,
                             "date": date_formatted,
                         }
-                        scraped_data[day_num][hour_num]["0"] = data_out
+                        scraped_data[day_num][hour_name]["0"] = data_out
                     else:
                         classes_in_hour = 0
-                        for section in block:
+                        for section in row_part:
                             if type(section) == bs4.element.Tag:
                                 event = None
                                 subject = None
@@ -199,10 +184,9 @@ def get_schedule_data(
                                     teacher = teacher_classroom[0]
                                     classroom = teacher_classroom[1]
                                 except IndexError:
-                                    pass
+                                    pass  # Makes it so empty strings don't crash the program
                                 except AttributeError:
-                                    """Makes it so empty strings don't crash the program"""
-                                    pass
+                                    pass  # Makes it so empty strings don't crash the program
                                 if group_raw:
                                     for gr in group_raw:
                                         group.append(gr.text)
@@ -212,17 +196,17 @@ def get_schedule_data(
                                             section.attrs["id"],
                                         )
                                 ):
-                                    """Check for blocks"""
-                                    for block_part in section:
-                                        if type(block_part) == bs4.element.Tag:
+                                    # Check for blocks
+                                    for block in section:
+                                        if type(block) == bs4.element.Tag:
                                             event = None
                                             subject = None
                                             group_raw = None
                                             group = []
                                             teacher = None
                                             classroom = None
                                             teacher_classroom = None
-                                            for img in block_part.select("img"):
+                                            for img in block.select("img"):
                                                 events_list = {
                                                     "Odpadla ura": "cancelled",
                                                     "Dogodek": "event",
@@ -243,15 +227,15 @@ def get_schedule_data(
                                                     event = "unknown_event"
                                             try:
                                                 subject = (
-                                                    block_part.find(class_="text14")
+                                                    block.find(class_="text14")
                                                         .text.replace("\n", "")
                                                         .replace("\t", "")
                                                 )
-                                                group_raw = block_part.find_all(
+                                                group_raw = block.find_all(
                                                     class_="text11 gray bold"
                                                 )
                                                 teacher_classroom = (
-                                                    block_part.find(class_="text11")
+                                                    block.find(class_="text11")
                                                         .text.replace("\n", "")
                                                         .replace("\t", "")
                                                         .replace("\r", "")
@@ -262,8 +246,7 @@ def get_schedule_data(
                                             except IndexError:
                                                 pass
                                             except AttributeError:
-                                                """Makes it so empty strings don't crash the program"""
-                                                pass
+                                                pass  # Makes it so empty strings don't crash the program
                                             if group_raw:
                                                 for gr in group_raw:
                                                     group.append(gr.text)
@@ -273,12 +256,12 @@ def get_schedule_data(
                                                 "classroom": classroom,
                                                 "group": group,
                                                 "event": event,
-                                                "hour": int(hour_num),
+                                                "hour": hour_name,
                                                 "week_day": int(day_num),
                                                 "hour_in_block": int(classes_in_hour),
                                                 "date": date_formatted,
                                             }
-                                            scraped_data[day_num][hour_num][
+                                            scraped_data[day_num][hour_name][
                                                 classes_in_hour
                                             ] = data_out
                                             classes_in_hour += 1
@@ -290,20 +273,20 @@ def get_schedule_data(
                                         "classroom": classroom,
                                         "group": group,
                                         "event": event,
-                                        "hour": int(hour_num),
+                                        "hour": hour_name,
                                         "week_day": int(day_num),
                                         "hour_in_block": int(classes_in_hour),
                                         "date": date_formatted,
                                     }
-                                    scraped_data[day_num][hour_num][
+                                    scraped_data[day_num][hour_name][
                                         classes_in_hour
                                     ] = data_out
                                     classes_in_hour += 1
                 count2 += 1
         count += 1
+    scraped_data["week_data"] = {"hour_times": [], "dates": [], "current_week": "", "class": ""}
     scraped_data["week_data"]["hour_times"] = hour_times
     scraped_data["week_data"]["dates"] = dates_formatted
     scraped_data["week_data"]["current_week"] = current_week
     scraped_data["week_data"]["class"] = current_class
-
     return scraped_data