From 249c11ae2440add63b574d7af5ae93860821020a Mon Sep 17 00:00:00 2001 From: Yassine Khelifi Date: Wed, 21 Oct 2020 09:07:06 +0000 Subject: [PATCH] remove build_image call --- examples/mlflow/mlflow_azureml.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mlflow/mlflow_azureml.ipynb b/examples/mlflow/mlflow_azureml.ipynb index a6537b0..43cfdd7 100644 --- a/examples/mlflow/mlflow_azureml.ipynb +++ b/examples/mlflow/mlflow_azureml.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","source":["## End to end example on how to track MLFlow experiment executed on Databricks in Azure Machine Learning\n\n
\n
\n
\n\n\n### Connect to Azure ML workspace and set MLFlow experiment"],"metadata":{}},{"cell_type":"code","source":["import mlflow\nimport mlflow.azureml\nimport azureml.mlflow\nimport azureml.core\n\nfrom azureml.core import Workspace\n\nsubscription_id = ''\n\n# Azure Machine Learning resource group \nresource_group = '' \n\n#Azure Machine Learning workspace name\nworkspace_name = ' \n\n# Instantiate Azure Machine Learning workspace\nws = Workspace.get(name=workspace_name,\n subscription_id=subscription_id,\n resource_group=resource_group)\n\n#Set MLflow experiment. \nexperimentName = \"/Users/{user name}/mlflowexp\" \n\nfor ex in experiments:\n if experimentName in experiments[0].name:\n print(f' found {experiments[0].name}')\n break;\n else:\n mlflow.create_experiment(experimentName) \n"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"markdown","source":["## Configure MLFlow tracking URL"],"metadata":{}},{"cell_type":"code","source":["uri = ws.get_mlflow_tracking_uri()\nmlflow.set_tracking_uri(uri)\nprint(uri)"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["# Import the dataset from scikit-learn and create the training and test datasets. \nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets import load_diabetes\n\ndb = load_diabetes()\nX = db.data\ny = db.target\nX_train, X_test, y_train, y_test = train_test_split(X, y)"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["import os\nimport joblib\nimport mlflow\nimport mlflow.sklearn\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_squared_error\n\nmodel_save_path = \"model\"\nexperiment_name = 'experiment-with-mlflow-projects'\nmlflow.set_experiment(experiment_name)\n\nwith mlflow.start_run():\n n_estimators = 100\n max_depth = 6\n max_features = 3\n # Create and train model\n rf = RandomForestRegressor(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features)\n rf.fit(X_train, y_train)\n # Make predictions\n predictions = rf.predict(X_test)\n \n # Log parameters\n mlflow.log_param(\"num_trees\", n_estimators)\n mlflow.log_param(\"maxdepth\", max_depth)\n mlflow.log_param(\"max_feat\", max_features)\n \n # Log model\n mlflow.sklearn.log_model(rf, \"random-forest-model\")\n \n # Create metrics\n mse = mean_squared_error(y_test, predictions)\n \n # Log metrics\n mlflow.log_metric(\"mse\", mse)\n \n \n # Save the model to the outputs directory for capture\n mlflow.sklearn.log_model(rf, model_save_path)\n "],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["exp = ws.experiments[experiment_name]\nexp"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["runs = list(exp.get_runs())\nprint(runs)\nrunid = runs[0].id"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["import mlflow.azureml\n\n\n\nmodel_image, azure_model = mlflow.azureml.build_image(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n workspace=ws,\n model_name=\"model\",\n image_name=\"model\",\n description=\"Sklearn Random forest\",\n synchronous=False)"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"markdown","source":["##Create AKS cluster\n\n Documentation below can be used to use the other options such as ACI or attaching existing cluster.\n\nhttps://docs.microsoft.com/en-us/azure/databricks/_static/notebooks/mlflow/mlflow-quick-start-deployment-azure.html"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.compute import AksCompute, ComputeTarget\n\n# Use the default configuration (you can also provide parameters to customize this)\nprov_config = AksCompute.provisioning_configuration()\n\naks_cluster_name = \"aks-cluster\" \n\ntry:\n aks_target = ComputeTarget(workspace=ws, name=aks_cluster_name )\n print('Found existing cluster, use it.')\nexcept ComputeTargetException:\n # Create the cluster\n aks_target = ComputeTarget.create(workspace = ws, \n name = aks_cluster_name, \n provisioning_configuration = prov_config)\n\n # Wait for the create process to complete\n aks_target.wait_for_completion(show_output = True)\n \n \nprint(aks_target.provisioning_state)\nprint(aks_target.provisioning_errors)"],"metadata":{},"outputs":[],"execution_count":11},{"cell_type":"markdown","source":["## Deploy the model to AKS cluster"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.webservice import Webservice, AksWebservice\n\n# Set configuration and service name\nprod_webservice_name = \"model-prod\"\nprod_webservice_deployment_config = AksWebservice.deploy_configuration(compute_target_name = \"aks-cluster\" )\n\nweb_service, azure_model = mlflow.azureml.deploy(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n service_name=prod_webservice_name,\n deployment_config = prod_webservice_deployment_config,\n workspace=ws,\n synchronous=True)\n"],"metadata":{},"outputs":[],"execution_count":13},{"cell_type":"markdown","source":["## Invoke Webservice"],"metadata":{}},{"cell_type":"code","source":["test_rows = [\n [0.01991321, 0.05068012, 0.10480869, 0.07007254, -0.03596778,\n -0.0266789 , -0.02499266, -0.00259226, 0.00371174, 0.04034337],\n [-0.01277963, -0.04464164, 0.06061839, 0.05285819, 0.04796534,\n 0.02937467, -0.01762938, 0.03430886, 0.0702113 , 0.00720652],\n [ 0.03807591, 0.05068012, 0.00888341, 0.04252958, -0.04284755,\n -0.02104223, -0.03971921, -0.00259226, -0.01811827, 0.00720652]]\n\nimport json\nimport pandas as pd\n\ntest_rows_as_json = pd.DataFrame(test_rows).to_json(orient=\"split\")\n\npredictions = web_service.run(test_rows_as_json)\nprint(predictions)"],"metadata":{},"outputs":[],"execution_count":15}],"metadata":{"name":"mlflow_azureml","notebookId":4039477032853995},"nbformat":4,"nbformat_minor":0} \ No newline at end of file +{"cells":[{"cell_type":"markdown","source":["## Track Azure Databricks run using MLflow in Azure Machine Learning\n\nIn order to execute the notebook:\n * You have a databricks workspace and cluster proivisioned\n * In the databricks cluster install azureml-mlflow package which should install azureml-core as per the [/how-to-use-mlflow-azure-databricks documentation page](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-mlflow-azure-databricks)\n * Import the notebook to your Azure Databricks workspace\n
\n
\n
\n\n\n### Connect to Azure ML workspace"],"metadata":{}},{"cell_type":"code","source":["import mlflow\nimport azureml.mlflow\nimport azureml.core\n\nfrom azureml.core import Workspace\n\nsubscription_id = ''\n\n# Azure Machine Learning resource group \nresource_group = '' \n\n#Azure Machine Learning workspace name\nworkspace_name = ''\n\n# Instantiate Azure Machine Learning workspace\nws = Workspace.get(name=workspace_name,\n subscription_id=subscription_id,\n resource_group=resource_group)\n"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"markdown","source":["##Set Mlflow experiment"],"metadata":{}},{"cell_type":"code","source":["import mlflow\nimport azureml.mlflow\n\n#Create MLflow experiment. \nexperiment_name = \"/Users/{adb_user_name}/mlflowexp\" \n\nfrom mlflow.exceptions import RestException\n\ntry:\n mlflow.create_experiment(experiment_name)\nexcept RestException:\n print(f'{experiment_name} already exists')\n "],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"markdown","source":["## Configure MLFlow tracking URL"],"metadata":{}},{"cell_type":"code","source":["uri = ws.get_mlflow_tracking_uri()\nmlflow.set_tracking_uri(uri)\nprint(uri)"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["# Import the dataset from scikit-learn and create the training and test datasets. \nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets import load_diabetes\n\ndb = load_diabetes()\nX = db.data\ny = db.target\nX_train, X_test, y_train, y_test = train_test_split(X, y)"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["import os\nimport joblib\nimport mlflow\nimport mlflow.sklearn\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_squared_error\n\nmodel_save_path = \"model\"\nexperiment_name = 'experiment-with-mlflow-projects'\nmlflow.set_experiment(experiment_name)\n\nwith mlflow.start_run():\n n_estimators = 100\n max_depth = 6\n max_features = 3\n # Create and train model\n rf = RandomForestRegressor(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features)\n rf.fit(X_train, y_train)\n # Make predictions\n predictions = rf.predict(X_test)\n \n # Log parameters\n mlflow.log_param(\"num_trees\", n_estimators)\n mlflow.log_param(\"maxdepth\", max_depth)\n mlflow.log_param(\"max_feat\", max_features)\n \n # Log model\n mlflow.sklearn.log_model(rf, \"random-forest-model\")\n \n # Create metrics\n mse = mean_squared_error(y_test, predictions)\n \n # Log metrics\n mlflow.log_metric(\"mse\", mse)\n \n \n # Save the model to the outputs directory for capture\n mlflow.sklearn.log_model(rf, model_save_path)\n "],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"markdown","source":["##Create AKS cluster\n\n Documentation below can be used to use the other options such as ACI or attaching existing cluster.\n\nhttps://docs.microsoft.com/en-us/azure/databricks/_static/notebooks/mlflow/mlflow-quick-start-deployment-azure.html"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.compute import AksCompute, ComputeTarget\nfrom azureml.exceptions import ComputeTargetException\n\n# Use the default configuration (you can also provide parameters to customize this)\nprov_config = AksCompute.provisioning_configuration()\n\naks_cluster_name = \"aks-cluster\" \n\ntry:\n aks_target = ComputeTarget(workspace=ws, name=aks_cluster_name )\n print('Found existing cluster, use it.')\nexcept ComputeTargetException:\n # Create the cluster\n aks_target = ComputeTarget.create(workspace = ws, \n name = aks_cluster_name, \n provisioning_configuration = prov_config)\n\n # Wait for the create process to complete\n aks_target.wait_for_completion(show_output = True)\n \n \nprint(aks_target.provisioning_state)\nprint(aks_target.provisioning_errors)"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"markdown","source":["##Retreive Azure ML run ID"],"metadata":{}},{"cell_type":"code","source":["exp = ws.experiments[experiment_name]\nruns = list(exp.get_runs())\nprint(runs)\n\nrunid = runs[0].id"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"markdown","source":["## Deploy the model to AKS cluster"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.webservice import Webservice, AksWebservice\nimport mlflow.azureml\n# Set configuration and service name\nprod_webservice_name = \"model-prod\"\nprod_webservice_deployment_config = AksWebservice.deploy_configuration(compute_target_name = \"aks-cluster\" )\n\nweb_service, azure_model = mlflow.azureml.deploy(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n service_name=prod_webservice_name,\n deployment_config = prod_webservice_deployment_config,\n workspace=ws,\n synchronous=True)\n"],"metadata":{},"outputs":[],"execution_count":14},{"cell_type":"markdown","source":["## Invoke Webservice"],"metadata":{}},{"cell_type":"code","source":["test_rows = [\n [0.01991321, 0.05068012, 0.10480869, 0.07007254, -0.03596778,\n -0.0266789 , -0.02499266, -0.00259226, 0.00371174, 0.04034337],\n [-0.01277963, -0.04464164, 0.06061839, 0.05285819, 0.04796534,\n 0.02937467, -0.01762938, 0.03430886, 0.0702113 , 0.00720652],\n [ 0.03807591, 0.05068012, 0.00888341, 0.04252958, -0.04284755,\n -0.02104223, -0.03971921, -0.00259226, -0.01811827, 0.00720652]]\n\nimport json\nimport pandas as pd\n\ntest_rows_as_json = pd.DataFrame(test_rows).to_json(orient=\"split\")\n\npredictions = web_service.run(test_rows_as_json)\nprint(predictions)"],"metadata":{},"outputs":[],"execution_count":16},{"cell_type":"code","source":["#Delete mlflow experiment\nmlflow_exp = mlflow.get_experiment_by_name(experiment_name)\nmlflow.delete_experiment(mlflow_exp.experiment_id)\n\n#Delete webservice\nweb_service.delete()"],"metadata":{},"outputs":[],"execution_count":17}],"metadata":{"name":"mlflow_azureml","notebookId":4039477032853995},"nbformat":4,"nbformat_minor":0}