From 43f8c364f3b4b9ce66109b00d11f883367a4fc5a Mon Sep 17 00:00:00 2001
From: Billy Hu <ninhu@microsoft.com>
Date: Wed, 30 Oct 2024 12:43:28 -0700
Subject: [PATCH 1/6] Fix output_path parameter doesn't support relative path

---
 sdk/evaluation/azure-ai-evaluation/CHANGELOG.md       |  1 +
 .../_evaluate/_batch_run/eval_run_context.py          |  5 +++++
 .../_evaluate/_batch_run/target_run_context.py        |  5 +++++
 .../azure/ai/evaluation/_evaluate/_evaluate.py        |  2 +-
 .../azure-ai-evaluation/eval_test_results.jsonl       |  1 +
 .../tests/unittests/test_evaluate.py                  | 11 +++++++----
 6 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl

diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
index ec35abaf57ee..959f527237c2 100644
--- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
+++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -7,6 +7,7 @@
 ### Breaking Changes
 
 ### Bugs Fixed
+- Fixed an issue where the `output_path` parameter in the `evaluate` API did not support relative path.
 
 ### Other Changes
 - Refined error messages for serviced-based evaluators and simulators.
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py
index 5eea27afbd8d..cfee8770b48f 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py
@@ -36,8 +36,11 @@ def __init__(self, client: Union[CodeClient, ProxyClient]) -> None:
         self.client = client
         self._is_batch_timeout_set_by_system = False
         self._is_otel_timeout_set_by_system = False
+        self._original_cwd = os.getcwd()
 
     def __enter__(self) -> None:
+        self._original_cwd = os.getcwd()
+
         if isinstance(self.client, CodeClient):
             ClientUserAgentUtil.append_user_agent(USER_AGENT)
             inject_openai_api()
@@ -64,6 +67,8 @@ def __exit__(
         exc_value: Optional[BaseException],
         exc_tb: Optional[types.TracebackType],
     ) -> None:
+        os.chdir(self._original_cwd)
+
         if isinstance(self.client, CodeClient):
             recover_openai_api()
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py
index 62a14aa75aa8..102704527deb 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py
@@ -17,8 +17,11 @@ class TargetRunContext:
 
     def __init__(self, upload_snapshot: bool) -> None:
         self._upload_snapshot = upload_snapshot
+        self._original_cwd = os.getcwd()
 
     def __enter__(self) -> None:
+        self._original_cwd = os.getcwd()
+
         # Address "[WinError 32] The process cannot access the file" error,
         # caused by conflicts when the venv and target function are in the same directory.
         # Setting PF_FLOW_ENTRY_IN_TMP to true uploads only the flex entry file (flow.flex.yaml).
@@ -31,5 +34,7 @@ def __exit__(
         exc_value: Optional[BaseException],
         exc_tb: Optional[types.TracebackType],
     ) -> None:
+        os.chdir(self._original_cwd)
+
         if not self._upload_snapshot:
             os.environ.pop(PF_FLOW_ENTRY_IN_TMP, None)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index 689ce162122a..53544760f434 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -391,7 +391,7 @@ def _validate_and_load_data(target, data, evaluators, output_path, azure_ai_proj
             )
 
         output_dir = output_path if os.path.isdir(output_path) else os.path.dirname(output_path)
-        if not os.path.exists(output_dir):
+        if output_dir and not os.path.exists(output_dir):
             msg = f"The output directory '{output_dir}' does not exist. Please create the directory manually."
             raise EvaluationException(
                 message=msg,
diff --git a/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl b/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl
new file mode 100644
index 000000000000..747f52c7478a
--- /dev/null
+++ b/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl
@@ -0,0 +1 @@
+{"rows": [{"inputs.query": "How do you create a run?", "inputs.context": "AML API only", "inputs.response": "To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.015384615400000001}, {"inputs.query": "How do you log a model?", "inputs.context": "Logging can be done using any OSS Sdk", "inputs.response": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = './outputs/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.02}, {"inputs.query": "What is the capital of France?", "inputs.context": "France is in Europe", "inputs.response": "Paris is the capital of France.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 1.0}], "metrics": {"g.f1_score": 0.3451282051333333}, "studio_url": null}
\ No newline at end of file
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
index ffd71a518c7c..b1763057722a 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -396,14 +396,17 @@ def test_evaluate_output_dir_not_exist(self, mock_model_config, questions_file):
 
         assert "The output directory './not_exist_dir' does not exist." in exc_info.value.args[0]
 
-    @pytest.mark.parametrize("use_pf_client", [True, False])
-    def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_pf_client):
-        output_path = os.path.join(tmpdir, "eval_test_results.jsonl")
+    @pytest.mark.parametrize("use_relative_path", [True, False])
+    def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_relative_path):
+        if use_relative_path:
+            output_path = os.path.join(tmpdir, "eval_test_results.jsonl")
+        else:
+            output_path = "eval_test_results.jsonl"
+
         result = evaluate(
             data=evaluate_test_data_jsonl_file,
             evaluators={"g": F1ScoreEvaluator()},
             output_path=output_path,
-            _use_pf_client=use_pf_client,
         )
 
         assert result is not None

From ac54be168707d930e3443286100903248ca3f3df Mon Sep 17 00:00:00 2001
From: Billy Hu <ninhu@microsoft.com>
Date: Thu, 31 Oct 2024 11:13:47 -0700
Subject: [PATCH 2/6] add comments

---
 .../azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py | 1 +
 .../ai/evaluation/_evaluate/_batch_run/target_run_context.py     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py
index cfee8770b48f..feb3e3b03d66 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/eval_run_context.py
@@ -39,6 +39,7 @@ def __init__(self, client: Union[CodeClient, ProxyClient]) -> None:
         self._original_cwd = os.getcwd()
 
     def __enter__(self) -> None:
+        # Preserve current working directory, as PF may change it without restoring it afterward
         self._original_cwd = os.getcwd()
 
         if isinstance(self.client, CodeClient):
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py
index 102704527deb..2dc843164552 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/target_run_context.py
@@ -20,6 +20,7 @@ def __init__(self, upload_snapshot: bool) -> None:
         self._original_cwd = os.getcwd()
 
     def __enter__(self) -> None:
+        # Preserve current working directory, as PF may change it without restoring it afterward
         self._original_cwd = os.getcwd()
 
         # Address "[WinError 32] The process cannot access the file" error,

From 646ace9e252ebcf51b3d23deefec14b7e451b1fd Mon Sep 17 00:00:00 2001
From: Billy Hu <ninhu@microsoft.com>
Date: Thu, 31 Oct 2024 13:07:34 -0700
Subject: [PATCH 3/6] fix the test

---
 sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl    | 1 -
 .../azure-ai-evaluation/tests/unittests/test_evaluate.py      | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)
 delete mode 100644 sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl

diff --git a/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl b/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl
deleted file mode 100644
index 747f52c7478a..000000000000
--- a/sdk/evaluation/azure-ai-evaluation/eval_test_results.jsonl
+++ /dev/null
@@ -1 +0,0 @@
-{"rows": [{"inputs.query": "How do you create a run?", "inputs.context": "AML API only", "inputs.response": "To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\n\n```\nfrom azureml.core import Experiment, Run\nfrom azureml.core.workspace import Workspace\n\n# Define workspace and experiment\nws = Workspace.from_config()\nexp = Experiment(workspace=ws, name='my_experiment')\n\n# Create a new run\nrun = exp.start_logging()\n```\n\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.015384615400000001}, {"inputs.query": "How do you log a model?", "inputs.context": "Logging can be done using any OSS Sdk", "inputs.response": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = './outputs/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 0.02}, {"inputs.query": "What is the capital of France?", "inputs.context": "France is in Europe", "inputs.response": "Paris is the capital of France.", "inputs.ground_truth": "Paris is the capital of France.", "outputs.g.f1_score": 1.0}], "metrics": {"g.f1_score": 0.3451282051333333}, "studio_url": null}
\ No newline at end of file
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
index b1763057722a..9e26bf9a992b 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate.py
@@ -398,6 +398,7 @@ def test_evaluate_output_dir_not_exist(self, mock_model_config, questions_file):
 
     @pytest.mark.parametrize("use_relative_path", [True, False])
     def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_relative_path):
+        # output_path is a file
         if use_relative_path:
             output_path = os.path.join(tmpdir, "eval_test_results.jsonl")
         else:
@@ -418,6 +419,9 @@ def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir, use_r
             data_from_file = json.loads(content)
             assert result["metrics"] == data_from_file["metrics"]
 
+        os.remove(output_path)
+
+        # output_path is a directory
         result = evaluate(
             data=evaluate_test_data_jsonl_file,
             evaluators={"g": F1ScoreEvaluator()},

From 7d5f3a30dd1baa0ff5eee560af1d1d0bb84fc8e9 Mon Sep 17 00:00:00 2001
From: Billy Hu <ninhu@microsoft.com>
Date: Thu, 31 Oct 2024 15:49:45 -0700
Subject: [PATCH 4/6] update

---
 .../azure/ai/evaluation/_evaluate/_evaluate.py                | 4 ++--
 .../azure/ai/evaluation/_evaluate/_utils.py                   | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index 53544760f434..2d0878a9cd79 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -888,9 +888,9 @@ def eval_batch_run(
     result_df_dict = result_df.to_dict("records")
     result: EvaluationResult = {"rows": result_df_dict, "metrics": metrics, "studio_url": studio_url}  # type: ignore
 
+    _print_summary(per_evaluator_results)
+
     if output_path:
         _write_output(output_path, result)
 
-    _print_summary(per_evaluator_results)
-
     return result
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
index 5e95eb904343..60b0197fdcda 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
@@ -211,6 +211,8 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None:
     with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f:
         json.dump(data_dict, f)
 
+    print(f"\nEvaluation results saved to {p.resolve()}")
+
 
 def _apply_column_mapping(
     source_df: pd.DataFrame, mapping_config: Optional[Dict[str, str]], inplace: bool = False

From aea0f0cabd6e4d53a1695a109040bb8f1952d762 Mon Sep 17 00:00:00 2001
From: Billy Hu <ninhu@microsoft.com>
Date: Thu, 31 Oct 2024 16:26:29 -0700
Subject: [PATCH 5/6] minor update

---
 .../azure/ai/evaluation/_evaluate/_evaluate.py                  | 2 +-
 .../azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
index 2d0878a9cd79..5ae9ebca6548 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -698,7 +698,7 @@ def _print_summary(per_evaluator_results: Dict[str, Any]) -> None:
     if output_dict:
         print("======= Combined Run Summary (Per Evaluator) =======\n")
         print(json.dumps(output_dict, indent=4))
-        print("\n====================================================")
+        print("\n====================================================\n")
 
 
 def _evaluate(  # pylint: disable=too-many-locals,too-many-statements
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
index 60b0197fdcda..b6dbe1dedf0b 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
@@ -211,7 +211,7 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None:
     with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f:
         json.dump(data_dict, f)
 
-    print(f"\nEvaluation results saved to {p.resolve()}")
+    print(f"Evaluation results saved to \"{p.resolve()}\".\n")
 
 
 def _apply_column_mapping(

From 11f076dd40b4d99b22216204c24ef1dd0e30a981 Mon Sep 17 00:00:00 2001
From: Billy Hu <ninhu@microsoft.com>
Date: Thu, 31 Oct 2024 16:29:59 -0700
Subject: [PATCH 6/6] update

---
 .../azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
index b6dbe1dedf0b..299685bf026c 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
@@ -211,7 +211,7 @@ def _write_output(path: Union[str, os.PathLike], data_dict: Any) -> None:
     with open(p, "w", encoding=DefaultOpenEncoding.WRITE) as f:
         json.dump(data_dict, f)
 
-    print(f"Evaluation results saved to \"{p.resolve()}\".\n")
+    print(f'Evaluation results saved to "{p.resolve()}".\n')
 
 
 def _apply_column_mapping(