From 43f8c364f3b4b9ce66109b00d11f883367a4fc5a Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Wed, 30 Oct 2024 12:43:28 -0700 Subject: [PATCH 1/6] Fix output_path parameter doesn't support relative path --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 1 + .../_evaluate/_batch_run/eval_run_context.py | 5 +++++ .../_evaluate/_batch_run/target_run_context.py | 5 +++++ .../azure/ai/evaluation/_evaluate/_evaluate.py | 2 +- .../azure-ai-evaluation/eval_test_results.jsonl | 1 + .../tests/unittests/test_evaluate.py | 11 +++++++---- 6 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index ec35abaf57ee..959f527237c2 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -7,6 +7,7 @@ ### Breaking Changes ### Bugs Fixed +- Fixed an issue where the `output_path` parameter in the `evaluate` API did not support relative path. ### Other Changes - Refined error messages for serviced-based evaluators and simulators. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py index 5eea27afbd8d..cfee8770b48f 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py @@ -36,8 +36,11 @@ def __init__(self, client: Union[CodeClient, ProxyClient]) -> None: self.client = client self._is_batch_timeout_set_by_system = False self._is_otel_timeout_set_by_system = False + self._original_cwd = os.getcwd() def __enter__(self) -> None: + self._original_cwd = os.getcwd() + if isinstance(self.client, CodeClient): ClientUserAgentUtil.append_user_agent(USER_AGENT) inject_openai_api() @@ -64,6 +67,8 @@ def __exit__( exc_value: Optional[BaseException], exc_tb: Optional[types.TracebackType], ) -> None: + os.chdir(self._original_cwd) + if isinstance(self.client, CodeClient): recover_openai_api() diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py index 62a14aa75aa8..102704527deb 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py @@ -17,8 +17,11 @@ class TargetRunContext: def __init__(self, upload_snapshot: bool) -> None: self._upload_snapshot = upload_snapshot + self._original_cwd = os.getcwd() def __enter__(self) -> None: + self._original_cwd = os.getcwd() + # Address "[WinError 32] The process cannot access the file" error, # caused by conflicts when the venv and target function are in the same directory. # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml). @@ -31,5 +34,7 @@ def __exit__( exc_value: Optional[BaseException], exc_tb: Optional[types.TracebackType], ) -> None: + os.chdir(self._original_cwd) + if not self._upload_snapshot: os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py index 689ce162122a..53544760f434 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py @@ -391,7 +391,7 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj ) output_dir = output_path if os.path.isdir(output_path) else os.path.dirname(output_path) - if not os.path.exists(output_dir): + if output_dir and not os.path.exists(output_dir): msg = f"The output directory '{output_dir}' does not exist. Please create the directory manually." raise EvaluationException( message=msg, diff --git a/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl b/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl new file mode 100644 index 000000000000..747f52c7478a --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl @@ -0,0 +1 @@ +{"rows": [{"inputs.query": "How do you create a run?", "inputs.context": "AML API only", "inputs.response": "To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.015384615400000001}, {"inputs.query": "How do you log a model?", "inputs.context": "Logging can be done using any OSS Sdk", "inputs.response": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = './outputs/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.02}, {"inputs.query": "What is the capital of France?", "inputs.context": "France is in Europe", "inputs.response": "Paris is the capital of France.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 1.0}], "metrics": {"g.f1_score": 0.3451282051333333}, "studio_url": null} \ No newline at end of file diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py index ffd71a518c7c..b1763057722a 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py @@ -396,14 +396,17 @@ def test_evaluate_output_dir_not_exist(self, mock_model_config, questions_file): assert "The output directory './not_exist_dir' does not exist." in exc_info.value.args[0] - @pytest.mark.parametrize("use_pf_client", [True, False]) - def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_pf_client): - output_path = os.path.join(tmpdir, "eval_test_results.jsonl") + @pytest.mark.parametrize("use_relative_path", [True, False]) + def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_relative_path): + if use_relative_path: + output_path = os.path.join(tmpdir, "eval_test_results.jsonl") + else: + output_path = "eval_test_results.jsonl" + result = evaluate( data=evaluate_test_data_jsonl_file, evaluators={"g": F1ScoreEvaluator()}, output_path=output_path, - _use_pf_client=use_pf_client, ) assert result is not None From ac54be168707d930e3443286100903248ca3f3df Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Thu, 31 Oct 2024 11:13:47 -0700 Subject: [PATCH 2/6] add comments --- .../azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py | 1 + .../ai/evaluation/_evaluate/_batch_run/target_run_context.py | 1 + 2 files changed, 2 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py index cfee8770b48f..feb3e3b03d66 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py @@ -39,6 +39,7 @@ def __init__(self, client: Union[CodeClient, ProxyClient]) -> None: self._original_cwd = os.getcwd() def __enter__(self) -> None: + # Preserve current working directory, as PF may change it without restoring it afterward self._original_cwd = os.getcwd() if isinstance(self.client, CodeClient): diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py index 102704527deb..2dc843164552 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py @@ -20,6 +20,7 @@ def __init__(self, upload_snapshot: bool) -> None: self._original_cwd = os.getcwd() def __enter__(self) -> None: + # Preserve current working directory, as PF may change it without restoring it afterward self._original_cwd = os.getcwd() # Address "[WinError 32] The process cannot access the file" error, From 646ace9e252ebcf51b3d23deefec14b7e451b1fd Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Thu, 31 Oct 2024 13:07:34 -0700 Subject: [PATCH 3/6] fix the test --- sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl | 1 - .../azure-ai-evaluation/tests/unittests/test_evaluate.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) delete mode 100644 sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl diff --git a/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl b/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl deleted file mode 100644 index 747f52c7478a..000000000000 --- a/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"rows": [{"inputs.query": "How do you create a run?", "inputs.context": "AML API only", "inputs.response": "To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.015384615400000001}, {"inputs.query": "How do you log a model?", "inputs.context": "Logging can be done using any OSS Sdk", "inputs.response": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = './outputs/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.02}, {"inputs.query": "What is the capital of France?", "inputs.context": "France is in Europe", "inputs.response": "Paris is the capital of France.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 1.0}], "metrics": {"g.f1_score": 0.3451282051333333}, "studio_url": null} \ No newline at end of file diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py index b1763057722a..9e26bf9a992b 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py @@ -398,6 +398,7 @@ def test_evaluate_output_dir_not_exist(self, mock_model_config, questions_file): @pytest.mark.parametrize("use_relative_path", [True, False]) def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_relative_path): + # output_path is a file if use_relative_path: output_path = os.path.join(tmpdir, "eval_test_results.jsonl") else: @@ -418,6 +419,9 @@ def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_r data_from_file = json.loads(content) assert result["metrics"] == data_from_file["metrics"] + os.remove(output_path) + + # output_path is a directory result = evaluate( data=evaluate_test_data_jsonl_file, evaluators={"g": F1ScoreEvaluator()}, From 7d5f3a30dd1baa0ff5eee560af1d1d0bb84fc8e9 Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Thu, 31 Oct 2024 15:49:45 -0700 Subject: [PATCH 4/6] update --- .../azure/ai/evaluation/_evaluate/_evaluate.py | 4 ++-- .../azure/ai/evaluation/_evaluate/_utils.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py index 53544760f434..2d0878a9cd79 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py @@ -888,9 +888,9 @@ def eval_batch_run( result_df_dict = result_df.to_dict("records") result: EvaluationResult = {"rows": result_df_dict, "metrics": metrics, "studio_url": studio_url} # type: ignore + _print_summary(per_evaluator_results) + if output_path: _write_output(output_path, result) - _print_summary(per_evaluator_results) - return result diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py index 5e95eb904343..60b0197fdcda 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py @@ -211,6 +211,8 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None: with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f: json.dump(data_dict, f) + print(f"\nEvaluation results saved to {p.resolve()}") + def _apply_column_mapping( source_df: pd.DataFrame, mapping_config: Optional[Dict[str, str]], inplace: bool = False From aea0f0cabd6e4d53a1695a109040bb8f1952d762 Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Thu, 31 Oct 2024 16:26:29 -0700 Subject: [PATCH 5/6] minor update --- .../azure/ai/evaluation/_evaluate/_evaluate.py | 2 +- .../azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py index 2d0878a9cd79..5ae9ebca6548 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py @@ -698,7 +698,7 @@ def _print_summary(per_evaluator_results: Dict[str, Any]) -> None: if output_dict: print("======= Combined Run Summary (Per Evaluator) =======\n") print(json.dumps(output_dict, indent=4)) - print("\n====================================================") + print("\n====================================================\n") def _evaluate( # pylint: disable=too-many-locals,too-many-statements diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py index 60b0197fdcda..b6dbe1dedf0b 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py @@ -211,7 +211,7 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None: with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f: json.dump(data_dict, f) - print(f"\nEvaluation results saved to {p.resolve()}") + print(f"Evaluation results saved to \"{p.resolve()}\".\n") def _apply_column_mapping( From 11f076dd40b4d99b22216204c24ef1dd0e30a981 Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Thu, 31 Oct 2024 16:29:59 -0700 Subject: [PATCH 6/6] update --- .../azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py index b6dbe1dedf0b..299685bf026c 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py @@ -211,7 +211,7 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None: with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f: json.dump(data_dict, f) - print(f"Evaluation results saved to \"{p.resolve()}\".\n") + print(f'Evaluation results saved to "{p.resolve()}".\n') def _apply_column_mapping(