CESNET · xsedla1o · Aug 1, 2024 · Jul 26, 2024 · Jul 29, 2024 · Jul 29, 2024
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -15,12 +15,18 @@ jobs:
       - name: Setup running platform stack
         run: docker compose up --build -d
 
+      - name: Pause for stack to start
+        run: sleep 10
+
       - name: Integration tests - API
         run: docker run 
           --env CONF_DIR=/dp3/tests/test_config 
           --network container:dp3_api 
           dp3_interpreter python -m unittest discover -s tests/test_api -v
 
+      - name: Check worker errors
+        run: docker compose logs worker | grep "WARNING\|ERROR\|exception" | grep -v "RabbitMQ\|it's\ OK\ now,\ we're\ successfully\ connected" || true
+
       - name: Teardown platform stack
         run: docker compose down
 

diff --git a/config/history_manager.yml b/config/history_manager.yml
@@ -9,6 +9,10 @@ aggregation_schedule:
   minute: "*/10"
 
 # Deleting old datapoints from master records
+mark_datapoints_schedule:
+  hour: "7,19"
+  minute: "45"
+
 datapoint_cleaning_schedule:
   minute: "*/30"
 

diff --git a/docs/configuration/history_manager.md b/docs/configuration/history_manager.md
@@ -13,25 +13,29 @@ Configuration file `history_manager.yml` is very simple:
 aggregation_schedule:  # (1)!
   minute: "*/10"  
 
-datapoint_cleaning_schedule:  # (2)!
+mark_datapoints_schedule: # (2)!
+  hour: "7,19"
+  minute: "45"
+datapoint_cleaning_schedule:  # (3)!
   minute: "*/30"
 
 snapshot_cleaning:
-  schedule: {minute: "15,45"}  # (3)!
-  older_than: 7d  # (4)!
+  schedule: {minute: "15,45"}  # (4)!
+  older_than: 7d  # (5)!
 
 datapoint_archivation:
-  schedule: {hour: 2, minute: 0}  # (5)!
-  older_than: 7d  # (6)!
-  archive_dir: "data/datapoints/"  # (7)!
+  schedule: {hour: 2, minute: 0}  # (6)!
+  older_than: 7d  # (7)!
+  archive_dir: "data/datapoints/"  # (8)!
 ```
 
 1. Parameter `aggregation_schedule` sets the interval for DP³ to aggregate observation datapoints in master records. This should be scheduled more often than cleaning of datapoints.
-2. Parameter `datapoint_cleaning_schedule` sets interval when should DP³ check if any data in master record of observations and timeseries attributes isn't too old and if there's something too old, removes it. To control what is considered as "too old", see parameter `max_age` in *Database entities* configuration.
-3. Parameter `snapshot_cleaning.schedule` sets the interval for DP³ to clean the snapshots collection. Optimally should be scheduled outside the snapshot creation window. See *Snapshots* configuration for more.  
-4. Parameter `snapshot_cleaning.older_than` sets how old must a snapshot be to be deleted.
-5. Parameter `datapoint_archivation.schedule` sets interval for DP³ to archive datapoints from raw collections.
-6. Parameter `datapoint_archivation.older_than` sets how old must a datapoint be to be archived.
-7. Parameter `datapoint_archivation.archive_dir` sets directory where should be archived old datapoints. If directory doesn't exist, it will be created, but write priviledges must be set correctly. Can be also set to `null` (or not set) to disable archivation and only delete old data.
+2. Parameter `mark_datapoints_schedule` sets the interval when the datapoint timestamps are marked for all entities in a master collection. This should be scheduled very rarely, as it's a very expensive operation. 
+3. Parameter `datapoint_cleaning_schedule` sets interval when should DP³ check if any data in master record of observations and timeseries attributes isn't too old and if there's something too old, removes it. To control what is considered as "too old", see parameter `max_age` in *Database entities* configuration.
+4. Parameter `snapshot_cleaning.schedule` sets the interval for DP³ to clean the snapshots collection. Optimally should be scheduled outside the snapshot creation window. See *Snapshots* configuration for more.  
+5. Parameter `snapshot_cleaning.older_than` sets how old must a snapshot be to be deleted.
+6. Parameter `datapoint_archivation.schedule` sets interval for DP³ to archive datapoints from raw collections.
+7. Parameter `datapoint_archivation.older_than` sets how old must a datapoint be to be archived.
+8. Parameter `datapoint_archivation.archive_dir` sets directory where should be archived old datapoints. If directory doesn't exist, it will be created, but write priviledges must be set correctly. Can be also set to `null` (or not set) to disable archivation and only delete old data.
 
 The schedule dictionaries are transformed to cron expressions, see [CronExpression docs][dp3.common.config.CronExpression] for details.
diff --git a/dp3/api/routers/entity.py b/dp3/api/routers/entity.py
@@ -61,8 +61,6 @@ def get_eid_snapshots_handler(
 ):
     """Handler for getting snapshots of EID"""
     snapshots = list(DB.get_snapshots(etype, eid, t1=date_from, t2=date_to))
-    for s in snapshots:
-        del s["_id"]
 
     return snapshots
 
@@ -136,11 +134,10 @@ async def list_entity_type_eids(
     time_created = None
 
     # Remove _id field
-    result = list(cursor_page)
+    result = [r["last"] for r in cursor_page]
     for r in result:
         time_created = r["_time_created"]
         del r["_time_created"]
-        del r["_id"]
 
     return EntityEidList(
         time_created=time_created, count=len(result), total_count=total_count, data=result