-
Notifications
You must be signed in to change notification settings - Fork 3
Revert "Optimize reports fetching" #93
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -199,59 +199,18 @@ public function report(array $resources = []): array | |
| */ | ||
| private function reportAuth(array $resources, array &$report): void | ||
| { | ||
| // check if we need to fetch teams! | ||
| $needTeams = !empty(array_intersect( | ||
| [Resource::TYPE_TEAM, Resource::TYPE_MEMBERSHIP], | ||
| $resources | ||
| )); | ||
|
|
||
| $pageLimit = 25; | ||
| $teams = ['total' => 0, 'teams' => []]; | ||
|
|
||
| if (\in_array(Resource::TYPE_USER, $resources)) { | ||
| $report[Resource::TYPE_USER] = $this->users->list( | ||
| [Query::limit(1)] | ||
| )['total']; | ||
| } | ||
|
|
||
| if ($needTeams) { | ||
| if (\in_array(Resource::TYPE_MEMBERSHIP, $resources)) { | ||
| $allTeams = []; | ||
| $lastTeam = null; | ||
|
|
||
| while (true) { | ||
| $params = $lastTeam | ||
| // TODO: should we use offset here? | ||
| // this, realistically, shouldn't be too much ig | ||
| ? [Query::cursorAfter($lastTeam)] | ||
| : [Query::limit($pageLimit)]; | ||
|
|
||
| $teamList = $this->teams->list($params); | ||
|
|
||
| $totalTeams = $teamList['total']; | ||
| $currentTeams = $teamList['teams']; | ||
|
|
||
| $allTeams = array_merge($allTeams, $currentTeams); | ||
| $lastTeam = $currentTeams[count($currentTeams) - 1]['$id'] ?? null; | ||
|
|
||
| if (count($currentTeams) < $pageLimit) { | ||
| break; | ||
| } | ||
| } | ||
| $teams = ['total' => $totalTeams, 'teams' => $allTeams]; | ||
| } else { | ||
| $teamList = $this->teams->list([Query::limit(1)]); | ||
| $teams = ['total' => $teamList['total'], 'teams' => []]; | ||
| } | ||
| $report[Resource::TYPE_USER] = $this->users->list()['total']; | ||
| } | ||
|
|
||
| if (\in_array(Resource::TYPE_TEAM, $resources)) { | ||
| $report[Resource::TYPE_TEAM] = $teams['total']; | ||
| $report[Resource::TYPE_TEAM] = $this->teams->list()['total']; | ||
| } | ||
|
|
||
| if (\in_array(Resource::TYPE_MEMBERSHIP, $resources)) { | ||
| $report[Resource::TYPE_MEMBERSHIP] = 0; | ||
| foreach ($teams['teams'] as $team) { | ||
| $teams = $this->teams->list()['teams']; | ||
| foreach ($teams as $team) { | ||
| $report[Resource::TYPE_MEMBERSHIP] += $this->teams->listMemberships( | ||
| $team['$id'], | ||
| [Query::limit(1)] | ||
|
|
@@ -277,14 +236,9 @@ private function reportDatabases(array $resources, array &$report): void | |
| private function reportStorage(array $resources, array &$report): void | ||
| { | ||
| if (\in_array(Resource::TYPE_BUCKET, $resources)) { | ||
| // just fetch one bucket for the `total` | ||
| $report[Resource::TYPE_BUCKET] = $this->storage->listBuckets([ | ||
| Query::limit(1) | ||
| ])['total']; | ||
| $report[Resource::TYPE_BUCKET] = $this->storage->listBuckets()['total']; | ||
| } | ||
|
|
||
| $pageLimit = 25; | ||
|
|
||
| if (\in_array(Resource::TYPE_FILE, $resources)) { | ||
| $report[Resource::TYPE_FILE] = 0; | ||
| $report['size'] = 0; | ||
|
|
@@ -295,89 +249,58 @@ private function reportStorage(array $resources, array &$report): void | |
| $currentBuckets = $this->storage->listBuckets( | ||
| $lastBucket | ||
| ? [Query::cursorAfter($lastBucket)] | ||
| : [Query::limit($pageLimit)] | ||
| : [Query::limit(20)] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Hardcoded limit of 20 seems arbitrary The pagination limit is hardcoded to 20, which is smaller than the Appwrite API default of 25. This will result in more API calls than necessary. Consider using the API's default limit or making this configurable: - : [Query::limit(20)]
+ : [Query::limit(25)]Also applies to: 272-272 🤖 Prompt for AI Agents |
||
| )['buckets']; | ||
|
|
||
| $buckets = array_merge($buckets, $currentBuckets); | ||
| $lastBucket = $buckets[count($buckets) - 1]['$id'] ?? null; | ||
|
|
||
| if (count($currentBuckets) < $pageLimit) { | ||
| if (count($currentBuckets) < 20) { | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| foreach ($buckets as $bucket) { | ||
| $files = []; | ||
| $lastFile = null; | ||
|
|
||
| while (true) { | ||
| $files = $this->storage->listFiles( | ||
| $currentFiles = $this->storage->listFiles( | ||
| $bucket['$id'], | ||
| $lastFile | ||
| ? [Query::cursorAfter($lastFile)] | ||
| : [Query::limit($pageLimit)] | ||
| : [Query::limit(20)] | ||
| )['files']; | ||
|
|
||
| $report[Resource::TYPE_FILE] += count($files); | ||
| foreach ($files as $file) { | ||
| // already includes the `sizeOriginal` | ||
| $report['size'] += $file['sizeOriginal'] ?? 0; | ||
| } | ||
|
|
||
| $files = array_merge($files, $currentFiles); | ||
| $lastFile = $files[count($files) - 1]['$id'] ?? null; | ||
|
|
||
| if (count($files) < $pageLimit) { | ||
| if (count($currentFiles) < 20) { | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| $report[Resource::TYPE_FILE] += count($files); | ||
| foreach ($files as $file) { | ||
| $report['size'] += $this->storage->getFile( | ||
| $bucket['$id'], | ||
| $file['$id'] | ||
| )['sizeOriginal']; | ||
| } | ||
|
Comment on lines
+284
to
+289
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Severe performance issue: Individual API calls for each file Making a separate API call to get the size of each file is extremely inefficient. For buckets with thousands of files, this will result in thousands of API calls and likely hit rate limits. This approach will not scale. Consider:
🤖 Prompt for AI Agents |
||
| } | ||
| $report['size'] = $report['size'] / 1000 / 1000; // MB | ||
| } | ||
| } | ||
|
|
||
| private function reportFunctions(array $resources, array &$report): void | ||
| { | ||
| $pageLimit = 25; | ||
| $needVarsOrDeployments = ( | ||
| \in_array(Resource::TYPE_DEPLOYMENT, $resources) || | ||
| \in_array(Resource::TYPE_ENVIRONMENT_VARIABLE, $resources) | ||
| ); | ||
|
|
||
| $functions = []; | ||
| $totalFunctions = 0; | ||
|
|
||
| if (!$needVarsOrDeployments && \in_array(Resource::TYPE_FUNCTION, $resources)) { | ||
| // Only function count needed, short-circuit | ||
| $funcList = $this->functions->list([Query::limit(1)]); | ||
| $report[Resource::TYPE_FUNCTION] = $funcList['total']; | ||
| return; | ||
| } | ||
|
|
||
| if ($needVarsOrDeployments) { | ||
| $lastFunction = null; | ||
| while (true) { | ||
| $params = $lastFunction | ||
| ? [Query::cursorAfter($lastFunction)] | ||
| : [Query::limit($pageLimit)]; | ||
|
|
||
| $funcList = $this->functions->list($params); | ||
|
|
||
| $totalFunctions = $funcList['total']; | ||
| $currentFunctions = $funcList['functions']; | ||
| $functions = array_merge($functions, $currentFunctions); | ||
|
|
||
| $lastFunction = $currentFunctions[count($currentFunctions) - 1]['$id'] ?? null; | ||
| if (count($currentFunctions) < $pageLimit) { | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if (\in_array(Resource::TYPE_FUNCTION, $resources)) { | ||
| $report[Resource::TYPE_FUNCTION] = $totalFunctions; | ||
| $report[Resource::TYPE_FUNCTION] = $this->functions->list()['total']; | ||
| } | ||
|
|
||
| if (\in_array(Resource::TYPE_DEPLOYMENT, $resources)) { | ||
| $report[Resource::TYPE_DEPLOYMENT] = 0; | ||
| $functions = $this->functions->list()['functions']; | ||
| foreach ($functions as $function) { | ||
| if (!empty($function['deploymentId'])) { | ||
| $report[Resource::TYPE_DEPLOYMENT] += 1; | ||
|
|
@@ -387,9 +310,9 @@ private function reportFunctions(array $resources, array &$report): void | |
|
|
||
| if (\in_array(Resource::TYPE_ENVIRONMENT_VARIABLE, $resources)) { | ||
| $report[Resource::TYPE_ENVIRONMENT_VARIABLE] = 0; | ||
| $functions = $this->functions->list()['functions']; | ||
| foreach ($functions as $function) { | ||
| // function model contains `vars`, we don't need to fetch the list again. | ||
| $report[Resource::TYPE_ENVIRONMENT_VARIABLE] += count($function['vars'] ?? []); | ||
| $report[Resource::TYPE_ENVIRONMENT_VARIABLE] += $this->functions->listVariables($function['$id'])['total']; | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -1374,7 +1297,7 @@ private function exportFunctions(int $batchSize): void | |
| $function['events'], | ||
| $function['schedule'], | ||
| $function['timeout'], | ||
| $function['deploymentId'] ?? '', | ||
| $function['deploymentId'], | ||
| $function['entrypoint'] | ||
| ); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -61,56 +61,39 @@ public function report(array $resources, array &$report): mixed | |
| foreach ($databases as $database) { | ||
| $databaseId = $database['$id']; | ||
|
|
||
| $tables = []; | ||
| $pageLimit = 25; | ||
| $lastTable = null; | ||
|
|
||
| while (true) { | ||
| /* $currentTables = $this->tables->list(...); */ | ||
| $currentTables = $this->database->listCollections( | ||
| $databaseId, | ||
| $lastTable | ||
| ? [Query::cursorAfter($lastTable)] | ||
| : [Query::limit($pageLimit)] | ||
| )['collections']; /* ['tables'] */ | ||
|
|
||
| $tables = array_merge($tables, $currentTables); | ||
| $lastTable = $tables[count($tables) - 1]['$id'] ?? null; | ||
|
|
||
| if (count($currentTables) < $pageLimit) { | ||
| break; | ||
| } | ||
| } | ||
| /* $tablesResponse = $this->tables->list(...); */ | ||
| $tablesResponse = $this->database->listCollections($databaseId); | ||
| $tables = $tablesResponse['collections']; | ||
|
|
||
| if (Resource::isSupported(Resource::TYPE_TABLE, $resources)) { | ||
| $report[Resource::TYPE_TABLE] += count($tables); | ||
| $report[Resource::TYPE_TABLE] += $tablesResponse['total']; | ||
| } | ||
|
Comment on lines
+64
to
70
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Performance concern: Removing pagination may cause issues with large datasets The removal of pagination for table fetching could potentially cause memory issues or API timeouts when dealing with databases that have many tables. The previous implementation used cursor-based pagination which is more scalable. Without pagination, if a database has hundreds or thousands of tables, this single API call might fail or consume excessive memory. 🤖 Prompt for AI Agents |
||
|
|
||
| if (Resource::isSupported([Resource::TYPE_ROW, Resource::TYPE_COLUMN, Resource::TYPE_INDEX], $resources)) { | ||
| foreach ($tables as $table) { | ||
| $tableId = $table['$id']; | ||
|
|
||
| if (Resource::isSupported(Resource::TYPE_COLUMN, $resources)) { | ||
| // a table already returns a list of attributes | ||
| $report[Resource::TYPE_COLUMN] += count($table['columns'] ?? $table['attributes'] ?? []); | ||
| } | ||
|
|
||
| if (in_array(Resource::TYPE_INDEX, $resources)) { | ||
| // a table already returns a list of indexes | ||
| $report[Resource::TYPE_INDEX] += count($table['indexes'] ?? []); | ||
| } | ||
|
|
||
| // this one's a bit heavy if the number of tables are high! | ||
| if (Resource::isSupported(Resource::TYPE_ROW, $resources)) { | ||
| /* $rowsResponse = $this->tables->listRows(...) */ | ||
| $rowsResponse = $this->database->listDocuments( | ||
| $databaseId, | ||
| $tableId, | ||
| [Query::limit(1)] | ||
| ); | ||
|
|
||
| $report[Resource::TYPE_ROW] += $rowsResponse['total']; | ||
| } | ||
|
|
||
| if (Resource::isSupported(Resource::TYPE_COLUMN, $resources)) { | ||
| /* $columnsResponse = $this->tables->listColumns(...); */ | ||
| $columnsResponse = $this->database->listAttributes($databaseId, $tableId); | ||
| $report[Resource::TYPE_COLUMN] += $columnsResponse['total']; | ||
| } | ||
|
|
||
| if (in_array(Resource::TYPE_INDEX, $resources)) { | ||
| /* $indexesResponse = $this->tables->listIndexes(...); */ | ||
| $indexesResponse = $this->database->listIndexes($databaseId, $tableId); | ||
| $report[Resource::TYPE_INDEX] += $indexesResponse['total']; | ||
| } | ||
|
Comment on lines
+86
to
+96
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Performance impact: Multiple API calls instead of using embedded metadata This change shifts from using embedded table metadata to making separate API calls for each table to get column and index counts. For a database with N tables, this results in 2N additional API calls (one for columns, one for indexes per table). This could significantly impact performance and API rate limits when dealing with many tables. Consider implementing a caching mechanism or batch API calls if the embedded metadata approach had accuracy issues that necessitated this change. 🤖 Prompt for AI Agents |
||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Critical: Removing pagination for users will fail with large user bases
This change removes pagination when fetching users. If there are thousands or millions of users, this single API call will likely timeout or exceed memory limits.
Consider keeping the pagination logic or at least document the limitations of this approach.
🤖 Prompt for AI Agents