Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/mlflow/mlflow_azureml.ipynb
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"cells":[{"cell_type":"markdown","source":["## End to end example on how to track MLFlow experiment executed on Databricks in Azure Machine Learning\n\n <br />\n <br />\n <br />\n\n\n### Connect to Azure ML workspace and set MLFlow experiment"],"metadata":{}},{"cell_type":"code","source":["import mlflow\nimport mlflow.azureml\nimport azureml.mlflow\nimport azureml.core\n\nfrom azureml.core import Workspace\n\nsubscription_id = ''\n\n# Azure Machine Learning resource group \nresource_group = '' \n\n#Azure Machine Learning workspace name\nworkspace_name = ' \n\n# Instantiate Azure Machine Learning workspace\nws = Workspace.get(name=workspace_name,\n subscription_id=subscription_id,\n resource_group=resource_group)\n\n#Set MLflow experiment. \nexperimentName = \"/Users/{user name}/mlflowexp\" \n\nfor ex in experiments:\n if experimentName in experiments[0].name:\n print(f' found {experiments[0].name}')\n break;\n else:\n mlflow.create_experiment(experimentName) \n"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"markdown","source":["## Configure MLFlow tracking URL"],"metadata":{}},{"cell_type":"code","source":["uri = ws.get_mlflow_tracking_uri()\nmlflow.set_tracking_uri(uri)\nprint(uri)"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["# Import the dataset from scikit-learn and create the training and test datasets. \nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets import load_diabetes\n\ndb = load_diabetes()\nX = db.data\ny = db.target\nX_train, X_test, y_train, y_test = train_test_split(X, y)"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["import os\nimport joblib\nimport mlflow\nimport mlflow.sklearn\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_squared_error\n\nmodel_save_path = \"model\"\nexperiment_name = 'experiment-with-mlflow-projects'\nmlflow.set_experiment(experiment_name)\n\nwith mlflow.start_run():\n n_estimators = 100\n max_depth = 6\n max_features = 3\n # Create and train model\n rf = RandomForestRegressor(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features)\n rf.fit(X_train, y_train)\n # Make predictions\n predictions = rf.predict(X_test)\n \n # Log parameters\n mlflow.log_param(\"num_trees\", n_estimators)\n mlflow.log_param(\"maxdepth\", max_depth)\n mlflow.log_param(\"max_feat\", max_features)\n \n # Log model\n mlflow.sklearn.log_model(rf, \"random-forest-model\")\n \n # Create metrics\n mse = mean_squared_error(y_test, predictions)\n \n # Log metrics\n mlflow.log_metric(\"mse\", mse)\n \n \n # Save the model to the outputs directory for capture\n mlflow.sklearn.log_model(rf, model_save_path)\n "],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["exp = ws.experiments[experiment_name]\nexp"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["runs = list(exp.get_runs())\nprint(runs)\nrunid = runs[0].id"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["import mlflow.azureml\n\n\n\nmodel_image, azure_model = mlflow.azureml.build_image(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n workspace=ws,\n model_name=\"model\",\n image_name=\"model\",\n description=\"Sklearn Random forest\",\n synchronous=False)"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"markdown","source":["##Create AKS cluster\n\n Documentation below can be used to use the other options such as ACI or attaching existing cluster.\n\nhttps://docs.microsoft.com/en-us/azure/databricks/_static/notebooks/mlflow/mlflow-quick-start-deployment-azure.html"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.compute import AksCompute, ComputeTarget\n\n# Use the default configuration (you can also provide parameters to customize this)\nprov_config = AksCompute.provisioning_configuration()\n\naks_cluster_name = \"aks-cluster\" \n\ntry:\n aks_target = ComputeTarget(workspace=ws, name=aks_cluster_name )\n print('Found existing cluster, use it.')\nexcept ComputeTargetException:\n # Create the cluster\n aks_target = ComputeTarget.create(workspace = ws, \n name = aks_cluster_name, \n provisioning_configuration = prov_config)\n\n # Wait for the create process to complete\n aks_target.wait_for_completion(show_output = True)\n \n \nprint(aks_target.provisioning_state)\nprint(aks_target.provisioning_errors)"],"metadata":{},"outputs":[],"execution_count":11},{"cell_type":"markdown","source":["## Deploy the model to AKS cluster"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.webservice import Webservice, AksWebservice\n\n# Set configuration and service name\nprod_webservice_name = \"model-prod\"\nprod_webservice_deployment_config = AksWebservice.deploy_configuration(compute_target_name = \"aks-cluster\" )\n\nweb_service, azure_model = mlflow.azureml.deploy(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n service_name=prod_webservice_name,\n deployment_config = prod_webservice_deployment_config,\n workspace=ws,\n synchronous=True)\n"],"metadata":{},"outputs":[],"execution_count":13},{"cell_type":"markdown","source":["## Invoke Webservice"],"metadata":{}},{"cell_type":"code","source":["test_rows = [\n [0.01991321, 0.05068012, 0.10480869, 0.07007254, -0.03596778,\n -0.0266789 , -0.02499266, -0.00259226, 0.00371174, 0.04034337],\n [-0.01277963, -0.04464164, 0.06061839, 0.05285819, 0.04796534,\n 0.02937467, -0.01762938, 0.03430886, 0.0702113 , 0.00720652],\n [ 0.03807591, 0.05068012, 0.00888341, 0.04252958, -0.04284755,\n -0.02104223, -0.03971921, -0.00259226, -0.01811827, 0.00720652]]\n\nimport json\nimport pandas as pd\n\ntest_rows_as_json = pd.DataFrame(test_rows).to_json(orient=\"split\")\n\npredictions = web_service.run(test_rows_as_json)\nprint(predictions)"],"metadata":{},"outputs":[],"execution_count":15}],"metadata":{"name":"mlflow_azureml","notebookId":4039477032853995},"nbformat":4,"nbformat_minor":0}
{"cells":[{"cell_type":"markdown","source":["## Track Azure Databricks run using MLflow in Azure Machine Learning\n\nIn order to execute the notebook:\n * You have a databricks workspace and cluster proivisioned\n * In the databricks cluster install azureml-mlflow package which should install azureml-core as per the [/how-to-use-mlflow-azure-databricks documentation page](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-mlflow-azure-databricks)\n * Import the notebook to your Azure Databricks workspace\n <br />\n <br />\n <br />\n\n\n### Connect to Azure ML workspace"],"metadata":{}},{"cell_type":"code","source":["import mlflow\nimport azureml.mlflow\nimport azureml.core\n\nfrom azureml.core import Workspace\n\nsubscription_id = ''\n\n# Azure Machine Learning resource group \nresource_group = '' \n\n#Azure Machine Learning workspace name\nworkspace_name = ''\n\n# Instantiate Azure Machine Learning workspace\nws = Workspace.get(name=workspace_name,\n subscription_id=subscription_id,\n resource_group=resource_group)\n"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"markdown","source":["##Set Mlflow experiment"],"metadata":{}},{"cell_type":"code","source":["import mlflow\nimport azureml.mlflow\n\n#Create MLflow experiment. \nexperiment_name = \"/Users/{adb_user_name}/mlflowexp\" \n\nfrom mlflow.exceptions import RestException\n\ntry:\n mlflow.create_experiment(experiment_name)\nexcept RestException:\n print(f'{experiment_name} already exists')\n "],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"markdown","source":["## Configure MLFlow tracking URL"],"metadata":{}},{"cell_type":"code","source":["uri = ws.get_mlflow_tracking_uri()\nmlflow.set_tracking_uri(uri)\nprint(uri)"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["# Import the dataset from scikit-learn and create the training and test datasets. \nfrom sklearn.model_selection import train_test_split\nfrom sklearn.datasets import load_diabetes\n\ndb = load_diabetes()\nX = db.data\ny = db.target\nX_train, X_test, y_train, y_test = train_test_split(X, y)"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["import os\nimport joblib\nimport mlflow\nimport mlflow.sklearn\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_squared_error\n\nmodel_save_path = \"model\"\nexperiment_name = 'experiment-with-mlflow-projects'\nmlflow.set_experiment(experiment_name)\n\nwith mlflow.start_run():\n n_estimators = 100\n max_depth = 6\n max_features = 3\n # Create and train model\n rf = RandomForestRegressor(n_estimators = n_estimators, max_depth = max_depth, max_features = max_features)\n rf.fit(X_train, y_train)\n # Make predictions\n predictions = rf.predict(X_test)\n \n # Log parameters\n mlflow.log_param(\"num_trees\", n_estimators)\n mlflow.log_param(\"maxdepth\", max_depth)\n mlflow.log_param(\"max_feat\", max_features)\n \n # Log model\n mlflow.sklearn.log_model(rf, \"random-forest-model\")\n \n # Create metrics\n mse = mean_squared_error(y_test, predictions)\n \n # Log metrics\n mlflow.log_metric(\"mse\", mse)\n \n \n # Save the model to the outputs directory for capture\n mlflow.sklearn.log_model(rf, model_save_path)\n "],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"markdown","source":["##Create AKS cluster\n\n Documentation below can be used to use the other options such as ACI or attaching existing cluster.\n\nhttps://docs.microsoft.com/en-us/azure/databricks/_static/notebooks/mlflow/mlflow-quick-start-deployment-azure.html"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.compute import AksCompute, ComputeTarget\nfrom azureml.exceptions import ComputeTargetException\n\n# Use the default configuration (you can also provide parameters to customize this)\nprov_config = AksCompute.provisioning_configuration()\n\naks_cluster_name = \"aks-cluster\" \n\ntry:\n aks_target = ComputeTarget(workspace=ws, name=aks_cluster_name )\n print('Found existing cluster, use it.')\nexcept ComputeTargetException:\n # Create the cluster\n aks_target = ComputeTarget.create(workspace = ws, \n name = aks_cluster_name, \n provisioning_configuration = prov_config)\n\n # Wait for the create process to complete\n aks_target.wait_for_completion(show_output = True)\n \n \nprint(aks_target.provisioning_state)\nprint(aks_target.provisioning_errors)"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"markdown","source":["##Retreive Azure ML run ID"],"metadata":{}},{"cell_type":"code","source":["exp = ws.experiments[experiment_name]\nruns = list(exp.get_runs())\nprint(runs)\n\nrunid = runs[0].id"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"markdown","source":["## Deploy the model to AKS cluster"],"metadata":{}},{"cell_type":"code","source":["from azureml.core.webservice import Webservice, AksWebservice\nimport mlflow.azureml\n# Set configuration and service name\nprod_webservice_name = \"model-prod\"\nprod_webservice_deployment_config = AksWebservice.deploy_configuration(compute_target_name = \"aks-cluster\" )\n\nweb_service, azure_model = mlflow.azureml.deploy(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n service_name=prod_webservice_name,\n deployment_config = prod_webservice_deployment_config,\n workspace=ws,\n synchronous=True)\n"],"metadata":{},"outputs":[],"execution_count":14},{"cell_type":"markdown","source":["## Invoke Webservice"],"metadata":{}},{"cell_type":"code","source":["test_rows = [\n [0.01991321, 0.05068012, 0.10480869, 0.07007254, -0.03596778,\n -0.0266789 , -0.02499266, -0.00259226, 0.00371174, 0.04034337],\n [-0.01277963, -0.04464164, 0.06061839, 0.05285819, 0.04796534,\n 0.02937467, -0.01762938, 0.03430886, 0.0702113 , 0.00720652],\n [ 0.03807591, 0.05068012, 0.00888341, 0.04252958, -0.04284755,\n -0.02104223, -0.03971921, -0.00259226, -0.01811827, 0.00720652]]\n\nimport json\nimport pandas as pd\n\ntest_rows_as_json = pd.DataFrame(test_rows).to_json(orient=\"split\")\n\npredictions = web_service.run(test_rows_as_json)\nprint(predictions)"],"metadata":{},"outputs":[],"execution_count":16},{"cell_type":"code","source":["#Delete mlflow experiment\nmlflow_exp = mlflow.get_experiment_by_name(experiment_name)\nmlflow.delete_experiment(mlflow_exp.experiment_id)\n\n#Delete webservice\nweb_service.delete()"],"metadata":{},"outputs":[],"execution_count":17}],"metadata":{"name":"mlflow_azureml","notebookId":4039477032853995},"nbformat":4,"nbformat_minor":0}