From 6fb44a5e8b497fa0bfc7841a6b6d9437e68a5ba1 Mon Sep 17 00:00:00 2001 From: jiasli <4003950+jiasli@users.noreply.github.com> Date: Wed, 19 Oct 2022 15:45:36 +0800 Subject: [PATCH 1/2] trim sdk --- .azure-pipelines/templates/azdev_setup.yml | 2 +- build_scripts/windows/scripts/build.cmd | 8 +- .../scripts/remove_unused_api_versions.py | 59 ------ scripts/trim_sdk.py | 171 ++++++++++++++++++ .../azure/cli/core/profiles/_shared.py | 12 ++ 5 files changed, 185 insertions(+), 67 deletions(-) delete mode 100644 build_scripts/windows/scripts/remove_unused_api_versions.py create mode 100644 scripts/trim_sdk.py diff --git a/.azure-pipelines/templates/azdev_setup.yml b/.azure-pipelines/templates/azdev_setup.yml index c9a532ca364..d2514a004d6 100644 --- a/.azure-pipelines/templates/azdev_setup.yml +++ b/.azure-pipelines/templates/azdev_setup.yml @@ -22,7 +22,7 @@ steps: azdev setup -c $CLI_REPO_PATH -r $CLI_EXT_REPO_PATH --debug fi # This helps detect issues in CI if a used SDK API version is deleted by the below script. - python $CLI_REPO_PATH/build_scripts/windows/scripts/remove_unused_api_versions.py + python $CLI_REPO_PATH/scripts/trim_sdk.py displayName: 'azdev setup' env: CLI_REPO_PATH: ${{ parameters.CLIRepoPath }} diff --git a/build_scripts/windows/scripts/build.cmd b/build_scripts/windows/scripts/build.cmd index 85d89338adf..bd750950b87 100644 --- a/build_scripts/windows/scripts/build.cmd +++ b/build_scripts/windows/scripts/build.cmd @@ -122,7 +122,7 @@ if %errorlevel% neq 0 goto ERROR pushd %BUILDING_DIR% %BUILDING_DIR%\python.exe %~dp0\patch_models_v2.py -%BUILDING_DIR%\python.exe %~dp0\remove_unused_api_versions.py +%BUILDING_DIR%\python.exe %REPO_ROOT%\scripts\trim_sdk.py popd echo Creating the wbin (Windows binaries) folder that will be added to the path... @@ -166,12 +166,6 @@ for /d /r %BUILDING_DIR%\Lib\site-packages\pip %%d in (__pycache__) do ( if exist %%d rmdir /s /q "%%d" ) -REM Remove aio -echo remove aio -for /d /r %BUILDING_DIR%\Lib\site-packages\azure\mgmt %%d in (aio) do ( - if exist %%d rmdir /s /q "%%d" -) - REM Remove dist-info echo remove dist-info pushd %BUILDING_DIR%\Lib\site-packages diff --git a/build_scripts/windows/scripts/remove_unused_api_versions.py b/build_scripts/windows/scripts/remove_unused_api_versions.py deleted file mode 100644 index bea5cd5af56..00000000000 --- a/build_scripts/windows/scripts/remove_unused_api_versions.py +++ /dev/null @@ -1,59 +0,0 @@ -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# -------------------------------------------------------------------------------------------- - -import logging -import os -import re -import shutil -import azure.mgmt.network - -from azure.cli.core.profiles import AD_HOC_API_VERSIONS, AZURE_API_PROFILES, ResourceType - - -_LOGGER = logging.getLogger(__name__) - - -def remove_unused_network_api_versions(): - # Hard-coded API versions - used_network_api_versions = set(AD_HOC_API_VERSIONS[ResourceType.MGMT_NETWORK].values()) - - # API versions in profile - for _, profile in AZURE_API_PROFILES.items(): - if ResourceType.MGMT_NETWORK in profile: - used_network_api_versions.add(profile[ResourceType.MGMT_NETWORK]) - - # Normalize API version: 2019-02-01 -> v2019_02_01 - used_network_api_vers = {f"v{api.replace('-','_')}" for api in used_network_api_versions} - - # Network SDK has a set of versions imported in models.py. - # Let's keep them before we figure out how to remove a version in all related SDK files. - path = azure.mgmt.network.__path__[0] - model_file = os.path.join(path, 'models.py') - with open(model_file, 'r', encoding='utf-8') as f: - content = f.read() - for m in re.finditer(r'from \.(v[_\d\w]*)\.models import \*', content): - used_network_api_vers.add(m.group(1)) - - _LOGGER.info('Used network API versions:') - _LOGGER.info(sorted(used_network_api_vers)) - - all_api_vers = {d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d)) and d.startswith('v')} - _LOGGER.info('All network API versions:') - _LOGGER.info(sorted(all_api_vers)) - - remove_api_vers = sorted(all_api_vers - used_network_api_vers) - _LOGGER.info('Network API versions that will be removed:') - _LOGGER.info(remove_api_vers) - - for ver in remove_api_vers: - shutil.rmtree(os.path.join(path, ver)) - - -def main(): - remove_unused_network_api_versions() - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - main() diff --git a/scripts/trim_sdk.py b/scripts/trim_sdk.py new file mode 100644 index 00000000000..fab5f059885 --- /dev/null +++ b/scripts/trim_sdk.py @@ -0,0 +1,171 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +""" +This script trims Python SDKs by +- Removing aio folders +- Removing unused API versions +""" + +import glob +import importlib +import logging +import os +import re +import shutil + +import azure.mgmt +import azure.mgmt.network + +from azure.cli.core.profiles import AD_HOC_API_VERSIONS, AZURE_API_PROFILES, ResourceType + +_LOGGER = logging.getLogger(__name__) + +DRY_RUN = False # Change to True to dry run + + +def _rmtree(path): + _LOGGER.warning(path) + if not DRY_RUN: + shutil.rmtree(path) + + +def calculate_folder_size(start_path): + """Calculate total size of a folder and file count.""" + # https://stackoverflow.com/questions/1392413/calculating-a-directorys-size-using-python + total_size = 0 + total_count = 0 + for dirpath, dirnames, filenames in os.walk(start_path): + for f in filenames: + fp = os.path.join(dirpath, f) + # skip if it is symbolic link + if not os.path.islink(fp): + total_count += 1 + total_size += os.path.getsize(fp) + + return total_size, total_count + + +def remove_aio_folders(): + _LOGGER.info("Removing aio folders:") + mgmt_sdk_dir = azure.mgmt.__path__[0] + for aio_folder in glob.glob(os.path.join(mgmt_sdk_dir, '**/aio'), recursive=True): + _rmtree(aio_folder) + + +def remove_unused_api_versions(resource_type): + _LOGGER.info(f"Removing unused api folders for {resource_type.import_prefix}:") + sdk_path = importlib.import_module(resource_type.import_prefix).__path__[0] + + used_api_versions = set() + + # Hard-coded API versions + if resource_type in AD_HOC_API_VERSIONS: + ad_hoc_api_versions = set(AD_HOC_API_VERSIONS[resource_type].values()) + used_api_versions.update(ad_hoc_api_versions) + + # API versions in profile + for profile in AZURE_API_PROFILES.values(): + if resource_type in profile: + # value is str like '2022-01-01' or SDKProfile + value = profile[resource_type] + if isinstance(value, str): + used_api_versions.add(value) + else: + # SDKProfile + # default_api_version is in value.profile[None] + used_api_versions.update(value.profile.values()) + + # Convert API version to its folder format: 2019-02-01 -> v2019_02_01 + used_api_folders = {f"v{api.replace('-','_')}" for api in used_api_versions} + + # SDK has a set of versions imported in models.py to form all latest models + model_file = os.path.join(sdk_path, 'models.py') + if os.path.exists(model_file): + with open(model_file, 'r', encoding='utf-8') as f: + content = f.read() + for m in re.finditer(r'from \.(v[_\d\w]*)\.models import \*', content): + used_api_folders.add(m.group(1)) + + _LOGGER.info(f'Used API folders: {sorted(used_api_folders)}') + + all_api_folders = {d for d in os.listdir(sdk_path) if os.path.isdir(os.path.join(sdk_path, d)) and d.startswith('v')} + _LOGGER.info(f'All API folders: {sorted(all_api_folders)}') + + remove_api_folders = sorted(all_api_folders - used_api_folders) + _LOGGER.info(f'API folders to remove: {remove_api_folders}') + + for api_folder in remove_api_folders: + full_path = os.path.join(sdk_path, api_folder) + _rmtree(full_path) + + +def _print_folder_size(folder): + size, count = calculate_folder_size(folder) + size_in_mb = size / 1048576 + _LOGGER.info(f"{size_in_mb:.2f} MB, {count} files") + + +def _get_all_sdks_to_trim(): + resource_types = [k for k, v in AZURE_API_PROFILES['latest'].items() if k.import_prefix.startswith('azure.mgmt')] + return resource_types + + +def _get_biggest_sdks_to_trim(): + # Return top biggest SDKs. This list was retrieved by running + # ncdu /opt/az/lib/python3.10/site-packages/azure/mgmt + resource_types = [ + # /network + ResourceType.MGMT_NETWORK, + # /web + ResourceType.MGMT_APPSERVICE, + # /compute + ResourceType.MGMT_COMPUTE, + # /containerservice + ResourceType.MGMT_CONTAINERSERVICE, + # /resource + ResourceType.MGMT_RESOURCE_FEATURES, + ResourceType.MGMT_RESOURCE_LINKS, + ResourceType.MGMT_RESOURCE_LOCKS, + ResourceType.MGMT_RESOURCE_POLICY, + ResourceType.MGMT_RESOURCE_RESOURCES, + ResourceType.MGMT_RESOURCE_SUBSCRIPTIONS, + ResourceType.MGMT_RESOURCE_DEPLOYMENTSCRIPTS, + ResourceType.MGMT_RESOURCE_TEMPLATESPECS, + ResourceType.MGMT_RESOURCE_PRIVATELINKS, + # /storage + ResourceType.MGMT_STORAGE, + # /databoxedge + ResourceType.MGMT_DATABOXEDGE, + # /containerregistry + ResourceType.MGMT_CONTAINERREGISTRY, + # /iothub + ResourceType.MGMT_IOTHUB, + ] + + return resource_types + + +def main(): + mgmt_sdk_dir = azure.mgmt.__path__[0] + + # Remove aio folders + _print_folder_size(mgmt_sdk_dir) + remove_aio_folders() + + _print_folder_size(mgmt_sdk_dir) + + # Removed unused API versions + resource_types = _get_biggest_sdks_to_trim() + + for r in resource_types: + remove_unused_api_versions(r) + + _print_folder_size(mgmt_sdk_dir) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + main() diff --git a/src/azure-cli-core/azure/cli/core/profiles/_shared.py b/src/azure-cli-core/azure/cli/core/profiles/_shared.py index 804ee5f784f..2af6eb36065 100644 --- a/src/azure-cli-core/azure/cli/core/profiles/_shared.py +++ b/src/azure-cli-core/azure/cli/core/profiles/_shared.py @@ -415,6 +415,18 @@ def default_api_version(self): 'container_network': '2018-08-01', 'appservice_network': '2020-04-01', 'appservice_ensure_subnet': '2019-02-01' + }, + ResourceType.MGMT_CONTAINERREGISTRY: { + # src/azure-cli/azure/cli/command_modules/acr/_client_factory.py:8 + 'VERSION_2019_05_01_PREVIEW': "2019-05-01-preview", + 'VERSION_2019_06_01_PREVIEW': "2019-06-01-preview", + 'VERSION_2020_11_01_PREVIEW': "2020-11-01-preview", + 'VERSION_2021_08_01_PREVIEW': "2021-08-01-preview", + 'VERSION_2022_02_01_PREVIEW': "2022-02-01-preview", + }, + ResourceType.MGMT_CONTAINERSERVICE: { + # src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_custom.py:50 + 'ManagedClusterAddonProfile': '2020-03-01', } } From 821581f0a676abdcc55a17a05c872e967a3667f5 Mon Sep 17 00:00:00 2001 From: Jiashuo Li <4003950+jiasli@users.noreply.github.com> Date: Wed, 26 Oct 2022 17:15:48 +0800 Subject: [PATCH 2/2] Update scripts/trim_sdk.py --- scripts/trim_sdk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/trim_sdk.py b/scripts/trim_sdk.py index fab5f059885..1de50786a3c 100644 --- a/scripts/trim_sdk.py +++ b/scripts/trim_sdk.py @@ -104,7 +104,7 @@ def remove_unused_api_versions(resource_type): def _print_folder_size(folder): size, count = calculate_folder_size(folder) - size_in_mb = size / 1048576 + size_in_mb = size / 1048576 # 1 MB = 1024 * 1024 B = 1048576 B _LOGGER.info(f"{size_in_mb:.2f} MB, {count} files")