Conversation
| noop_query = None | ||
| configuration_properties = None | ||
| data_sample_query = None | ||
| sample_query = None |
There was a problem hiding this comment.
This is just to stay consistent with the noop_query name.
| models.db.session.commit() | ||
| logger.info("Deleted %d unused query results.", deleted_count) | ||
|
|
||
|
|
There was a problem hiding this comment.
Let's stay with PEP8 and do two linebreaks between functions.
| def cleanup_data_in_table(table_model): | ||
| removed_metadata = table_model.query.filter( | ||
| table_model.exists == False, | ||
| table_model.exists.is_(False), |
There was a problem hiding this comment.
Also something that flake8 reported in my editor.
| if is_old_data: | ||
| table_model.query.filter( | ||
| table_model.id == removed_metadata_row.id, | ||
| ).delete() |
There was a problem hiding this comment.
As mentioneded on IRC this function should do the cleanup completely on the db side to prevent having to fetch the data to delete it afterwards. In my experience such cleanup function tend to break with race conditions if the list of things to delete exceeds memory or runtime limits.
| def insert_or_update_table_metadata(data_source, existing_tables_set, table_data): | ||
| # Update all persisted tables that exist to reflect this. | ||
| persisted_tables = TableMetadata.query.filter( | ||
| TableMetadata.name.in_(tuple(existing_tables_set)), |
There was a problem hiding this comment.
No needs to convert the set to a tuple for IN queries.
| TableMetadata.data_source_id == ds.id, | ||
| ).all() | ||
|
|
||
| for j, table in enumerate(all_existing_persisted_tables): |
| existing_columns_set = set() | ||
|
|
||
| # Clear the set for the next round | ||
| existing_columns_set.clear() |
There was a problem hiding this comment.
I think clearing the set was the purpose of this right?
Just a minor follow-up to #930.