From d332d02d4ac76ed9fe54866c1285940f740fe011 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Fri, 24 Mar 2023 14:43:24 -0400
Subject: [PATCH 1/8] Force version of sklearn in examples

---
 .../sklearn_examples_requirements.txt          | 18 ++++++++++++++++++
 .../sklearn_japanese_housing_regression.py     |  4 ++++
 .../inference/sklearn_mnist_classification.py  |  2 ++
 3 files changed, 24 insertions(+)
 create mode 100644 sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt b/sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt
new file mode 100644
index 000000000000..256e6be3b567
--- /dev/null
+++ b/sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+scikit-learn==1.0.2
\ No newline at end of file
diff --git a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
index 0d0adb0425a8..e6a544c6a325 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
@@ -32,6 +32,7 @@
 import argparse
 from typing import Iterable
 
+import os
 import pandas
 
 import apache_beam as beam
@@ -137,6 +138,9 @@ def run(
   known_args, pipeline_args = parse_known_args(argv)
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
+  requirements_dir = os.path.dirname(os.path.realpath(__file__))
+  # Pin to the version that we trained the model on.
+  pipeline_options.view_as(SetupOptions).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
 
   pipeline = test_pipeline
   if not test_pipeline:
diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
index e748166e6fda..fe46b280df77 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -90,6 +90,8 @@ def run(
   known_args, pipeline_args = parse_known_args(argv)
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
+  # Pin to the version that we trained the model on.
+  pipeline_options.view_as(SetupOptions).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
 
   # In this example we pass keyed inputs to RunInference transform.
   # Therefore, we use KeyedModelHandler wrapper over SklearnModelHandlerNumpy.

From a4ccaf2940cf880d2c5ffd69eab37a269394a138 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Fri, 24 Mar 2023 14:44:25 -0400
Subject: [PATCH 2/8] Add missing lines

---
 .../examples/inference/sklearn_mnist_classification.py          | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
index fe46b280df77..0bbabd75cbe2 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -26,6 +26,7 @@
 
 import argparse
 import logging
+import os
 from typing import Iterable
 from typing import List
 from typing import Tuple
@@ -90,6 +91,7 @@ def run(
   known_args, pipeline_args = parse_known_args(argv)
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
+  requirements_dir = os.path.dirname(os.path.realpath(__file__))
   # Pin to the version that we trained the model on.
   pipeline_options.view_as(SetupOptions).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
 

From c21348e68248504b37ad131e3f9596ac54be3a9c Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Fri, 24 Mar 2023 14:45:06 -0400
Subject: [PATCH 3/8] Comments

---
 .../examples/inference/sklearn_japanese_housing_regression.py   | 2 +-
 .../examples/inference/sklearn_mnist_classification.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
index e6a544c6a325..8b3fbdbfb671 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
@@ -139,7 +139,7 @@ def run(
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
   requirements_dir = os.path.dirname(os.path.realpath(__file__))
-  # Pin to the version that we trained the model on.
+  # Pin to the version that we trained the model on. Sklearn doesn't guarantee compatability between versions.
   pipeline_options.view_as(SetupOptions).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
 
   pipeline = test_pipeline
diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
index 0bbabd75cbe2..a5522f833456 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -92,7 +92,7 @@ def run(
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
   requirements_dir = os.path.dirname(os.path.realpath(__file__))
-  # Pin to the version that we trained the model on.
+  # Pin to the version that we trained the model on. Sklearn doesn't guarantee compatability between versions.
   pipeline_options.view_as(SetupOptions).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
 
   # In this example we pass keyed inputs to RunInference transform.

From 078e9bb7bd49f78a6efc99d95debe2ed1fbfd67e Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Fri, 24 Mar 2023 14:48:36 -0400
Subject: [PATCH 4/8] Update readme

---
 sdks/python/apache_beam/examples/inference/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdks/python/apache_beam/examples/inference/README.md b/sdks/python/apache_beam/examples/inference/README.md
index f769e82bf22f..2dc1247934d0 100644
--- a/sdks/python/apache_beam/examples/inference/README.md
+++ b/sdks/python/apache_beam/examples/inference/README.md
@@ -334,6 +334,7 @@ To use this transform, you need a dataset and model for language modeling.
 ...
 ```
 2. Create a file named `MODEL_PATH` that contains the pickled file of a scikit-learn model trained on MNIST data. Please refer to this scikit-learn [model persistence documentation](https://scikit-learn.org/stable/model_persistence.html) on how to serialize models.
+3. Update sklearn_examples_requirements.txt to match the version of sklearn used to train the model. Sklearn doesn't guarantee model compatability between versions.
 
 
 ### Running `sklearn_mnist_classification.py`

From db5f7d0baf9a262fbe834f2c318525a235f365e9 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Sat, 25 Mar 2023 08:20:46 -0400
Subject: [PATCH 5/8] Lint

---
 .../inference/sklearn_japanese_housing_regression.py      | 8 ++++++--
 .../examples/inference/sklearn_mnist_classification.py    | 8 ++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
index 8b3fbdbfb671..690f5e67075a 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
@@ -88,6 +88,7 @@ def sort_by_features(dataframe, max_size):
 
 
 class LoadDataframe(beam.DoFn):
+
   def process(self, file_name: str) -> Iterable[pandas.DataFrame]:
     """ Loads data files as a pandas dataframe."""
     file = FileSystems.open(file_name, 'rb')
@@ -139,8 +140,11 @@ def run(
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
   requirements_dir = os.path.dirname(os.path.realpath(__file__))
-  # Pin to the version that we trained the model on. Sklearn doesn't guarantee compatability between versions.
-  pipeline_options.view_as(SetupOptions).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
+  # Pin to the version that we trained the model on.
+  # Sklearn doesn't guarantee compatability between versions.
+  pipeline_options.view_as(
+      SetupOptions
+  ).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
 
   pipeline = test_pipeline
   if not test_pipeline:
diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
index a5522f833456..e1c1a4fe78af 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -53,6 +53,7 @@ class PostProcessor(beam.DoFn):
   """Process the PredictionResult to get the predicted label.
   Returns a comma separated string with true label and predicted label.
   """
+
   def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]:
     label, prediction_result = element
     prediction = prediction_result.inference
@@ -92,8 +93,11 @@ def run(
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
   requirements_dir = os.path.dirname(os.path.realpath(__file__))
-  # Pin to the version that we trained the model on. Sklearn doesn't guarantee compatability between versions.
-  pipeline_options.view_as(SetupOptions).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
+  # Pin to the version that we trained the model on.
+  # Sklearn doesn't guarantee compatability between versions.
+  pipeline_options.view_as(
+      SetupOptions
+  ).requirements_file = f'{requirements_dir}/sklearn_examples_requirements.txt'
 
   # In this example we pass keyed inputs to RunInference transform.
   # Therefore, we use KeyedModelHandler wrapper over SklearnModelHandlerNumpy.

From 8cd2fbf06b7c93135aec6f949a68d0228869d2ea Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Sat, 25 Mar 2023 08:22:48 -0400
Subject: [PATCH 6/8] Add info in requirements.txt

---
 .../examples/inference/sklearn_examples_requirements.txt        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt b/sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt
index 256e6be3b567..7c41e37e01b7 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt
+++ b/sdks/python/apache_beam/examples/inference/sklearn_examples_requirements.txt
@@ -15,4 +15,6 @@
 # limitations under the License.
 #
 
+# This should match the saved version of your trained model.
+# Beam's tests use sklearn 1.0.2 for their saved models.
 scikit-learn==1.0.2
\ No newline at end of file

From 17959958ccbd5b2ba1f74929b7173fcdb596de16 Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Sat, 25 Mar 2023 08:26:56 -0400
Subject: [PATCH 7/8] Lint

---
 .../examples/inference/sklearn_japanese_housing_regression.py    | 1 -
 .../examples/inference/sklearn_mnist_classification.py           | 1 -
 2 files changed, 2 deletions(-)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
index 690f5e67075a..f23412a82e94 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
@@ -88,7 +88,6 @@ def sort_by_features(dataframe, max_size):
 
 
 class LoadDataframe(beam.DoFn):
-
   def process(self, file_name: str) -> Iterable[pandas.DataFrame]:
     """ Loads data files as a pandas dataframe."""
     file = FileSystems.open(file_name, 'rb')
diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
index e1c1a4fe78af..6f8ea929bbb6 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -53,7 +53,6 @@ class PostProcessor(beam.DoFn):
   """Process the PredictionResult to get the predicted label.
   Returns a comma separated string with true label and predicted label.
   """
-
   def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]:
     label, prediction_result = element
     prediction = prediction_result.inference

From 27d611a2f2f0ecf876573ee7afed563aaf0587ba Mon Sep 17 00:00:00 2001
From: Danny McCormick <dannymccormick@google.com>
Date: Sat, 25 Mar 2023 08:33:46 -0400
Subject: [PATCH 8/8] Lint

---
 .../examples/inference/sklearn_japanese_housing_regression.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
index f23412a82e94..3aa2f362fa64 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_japanese_housing_regression.py
@@ -30,9 +30,9 @@
 """
 
 import argparse
+import os
 from typing import Iterable
 
-import os
 import pandas
 
 import apache_beam as beam