From 29767cf2bc0cd16b9006b0ba9990a46c920390ec Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Mon, 23 May 2022 13:10:36 -0400
Subject: [PATCH 01/12] sklearn example and IT test

---
 .test-infra/jenkins/jira_utils/__init__.py    | 16 ++++
 .../inference/sklearn_inference_example.py    | 95 +++++++++++++++++++
 .../ml/inference/sklearn_inference_it_test.py | 51 ++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 .test-infra/jenkins/jira_utils/__init__.py
 create mode 100644 sdks/python/apache_beam/examples/inference/sklearn_inference_example.py
 create mode 100644 sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py

diff --git a/.test-infra/jenkins/jira_utils/__init__.py b/.test-infra/jenkins/jira_utils/__init__.py
new file mode 100644
index 000000000000..cce3acad34a4
--- /dev/null
+++ b/.test-infra/jenkins/jira_utils/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py b/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py
new file mode 100644
index 000000000000..caf61de41594
--- /dev/null
+++ b/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py
@@ -0,0 +1,95 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""A pipeline that uses RunInference API to perform image classification."""
+
+import argparse
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Tuple
+
+import apache_beam as beam
+from apache_beam.ml.inference.api import PredictionResult
+from apache_beam.ml.inference.api import RunInference
+from apache_beam.ml.inference.sklearn_inference import ModelFileType
+from apache_beam.ml.inference.sklearn_inference import SklearnModelLoader
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.options.pipeline_options import SetupOptions
+
+
+def process_input(row: str) -> Tuple[int, List[int]]:
+  data = row.split(',')
+  label, pixels = int(data[0]), data[1:]
+  pixels = [int(pixel) for pixel in pixels]
+  return label, pixels
+
+
+class PostProcessor(beam.DoFn):
+  def process(self, element: Tuple[int, PredictionResult]) -> Iterable[Dict]:
+    label, prediction_result = element
+    prediction = prediction_result.inference
+    yield {label: prediction}
+
+
+def parse_known_args(argv):
+  """Parses args for the workflow."""
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--input_file',
+      dest='input',
+      help='CSV file with row containing label and pixel values.')
+  parser.add_argument(
+      '--output', dest='output', help='Path to save output predictions.')
+  parser.add_argument(
+      '--model_path',
+      dest='model_path',
+      help='Path to load the Sklearn model for Inference.')
+  return parser.parse_known_args(argv)
+
+
+def run(argv=None, save_main_session=True):
+  """Entry point. Defines and runs the pipeline."""
+  known_args, pipeline_args = parse_known_args(argv)
+  pipeline_options = PipelineOptions(pipeline_args)
+  pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
+
+  model_loader = SklearnModelLoader(
+      model_file_type=ModelFileType.PICKLE, model_uri=known_args.model_path)
+
+  with beam.Pipeline(options=pipeline_options) as p:
+    label_pixel_tuple = (
+        p
+        | "ReadFromInput" >> beam.io.ReadFromText(
+            known_args.input, skip_header_lines=1)
+        | "Process inputs" >> beam.Map(process_input))
+
+    predictions = (
+        label_pixel_tuple
+        | "RunInference" >> RunInference(model_loader).with_output_types(
+            Tuple[int, PredictionResult])
+        | "PostProcessor" >> beam.ParDo(PostProcessor()))
+
+    if known_args.output:
+      predictions | "WriteOutputToGCS" >> beam.io.WriteToText( # pylint: disable=expression-not-assigned
+        known_args.output,
+        shard_name_template='',
+        append_trailing_newlines=True)
+
+
+if __name__ == '__main__':
+  run()
diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
new file mode 100644
index 000000000000..a864dd4414c2
--- /dev/null
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""End-to-End test for Sklearn Inference"""
+
+import logging
+import pytest
+import unittest
+import uuid
+
+from apache_beam.io.filesystems import FileSystems
+from apache_beam.examples.inference import sklearn_inference_example
+from apache_beam.testing.test_pipeline import TestPipeline
+
+
+class SklearnInference(unittest.TestCase):
+  @pytest.mark.it_postcommit
+  def test_predictions_output_file(self):
+    test_pipeline = TestPipeline(is_integration_test=False)
+    input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv'
+    output_file_dir = 'gs://apache-beam-ml/testing/predictions'  # pylint: disable=line-too-long
+    output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt'])
+    model_path = 'gs://apache-beam-ml/models/mnist_model_svm.pickle'  # pylint: disable=line-too-long
+    extra_opts = {
+        'input': input_file,
+        'output': output_file,
+        'model_path': model_path,
+    }
+    sklearn_inference_example.run(
+        test_pipeline.get_full_options_as_args(**extra_opts),
+        save_main_session=False)
+    self.assertEqual(FileSystems().exists(output_file), True)
+
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.DEBUG)
+  unittest.main()

From f7a36a6d5a8f56157cea1f3e4343c9a3fdc84641 Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Thu, 9 Jun 2022 18:25:51 -0400
Subject: [PATCH 02/12] Change the example name

---
 ...inference_example.py => sklearn_mnist_classification.py} | 0
 .../apache_beam/ml/inference/sklearn_inference_it_test.py   | 6 +++---
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename sdks/python/apache_beam/examples/inference/{sklearn_inference_example.py => sklearn_mnist_classification.py} (100%)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
similarity index 100%
rename from sdks/python/apache_beam/examples/inference/sklearn_inference_example.py
rename to sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
index a864dd4414c2..dc13ceca85e2 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
@@ -23,14 +23,14 @@
 import uuid
 
 from apache_beam.io.filesystems import FileSystems
-from apache_beam.examples.inference import sklearn_inference_example
+from apache_beam.examples.inference import sklearn_mnist_classification
 from apache_beam.testing.test_pipeline import TestPipeline
 
 
 class SklearnInference(unittest.TestCase):
   @pytest.mark.it_postcommit
   def test_predictions_output_file(self):
-    test_pipeline = TestPipeline(is_integration_test=False)
+    test_pipeline = TestPipeline(is_integration_test=True)
     input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv'
     output_file_dir = 'gs://apache-beam-ml/testing/predictions'  # pylint: disable=line-too-long
     output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt'])
@@ -40,7 +40,7 @@ def test_predictions_output_file(self):
         'output': output_file,
         'model_path': model_path,
     }
-    sklearn_inference_example.run(
+    sklearn_mnist_classification.run(
         test_pipeline.get_full_options_as_args(**extra_opts),
         save_main_session=False)
     self.assertEqual(FileSystems().exists(output_file), True)

From 0be2a8b2330c8b852697f9f5aaabc72220c0d4e2 Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 10:17:19 -0400
Subject: [PATCH 03/12] Refactor sklearn example

---
 .test-infra/jenkins/jira_utils/__init__.py    | 16 ------
 .../inference/sklearn_mnist_classification.py | 49 +++++++++++++------
 2 files changed, 33 insertions(+), 32 deletions(-)
 delete mode 100644 .test-infra/jenkins/jira_utils/__init__.py

diff --git a/.test-infra/jenkins/jira_utils/__init__.py b/.test-infra/jenkins/jira_utils/__init__.py
deleted file mode 100644
index cce3acad34a4..000000000000
--- a/.test-infra/jenkins/jira_utils/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
index caf61de41594..a48d9d00ceaf 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -15,19 +15,26 @@
 # limitations under the License.
 #
 
-"""A pipeline that uses RunInference API to perform image classification."""
+"""A pipeline that uses RunInference API to classify MNIST data.
+
+This pipeline takes a text file in which data is comma separated ints. The first
+column would be the true label and the rest would be the pixel values. The data
+is processed and then a model trained on the MNIST data would be used to perform
+the inference. The pipeline writes the prediction to an output file in which
+users can then compare against the true label.
+"""
 
 import argparse
-from typing import Dict
 from typing import Iterable
 from typing import List
 from typing import Tuple
 
 import apache_beam as beam
-from apache_beam.ml.inference.api import PredictionResult
-from apache_beam.ml.inference.api import RunInference
+from apache_beam.ml.inference.base import KeyedModelHandler
+from apache_beam.ml.inference.base import PredictionResult
+from apache_beam.ml.inference.base import RunInference
 from apache_beam.ml.inference.sklearn_inference import ModelFileType
-from apache_beam.ml.inference.sklearn_inference import SklearnModelLoader
+from apache_beam.ml.inference.sklearn_inference import SklearnModelHandlerNumpy
 from apache_beam.options.pipeline_options import PipelineOptions
 from apache_beam.options.pipeline_options import SetupOptions
 
@@ -40,10 +47,13 @@ def process_input(row: str) -> Tuple[int, List[int]]:
 
 
 class PostProcessor(beam.DoFn):
-  def process(self, element: Tuple[int, PredictionResult]) -> Iterable[Dict]:
+  """Process the PredictionResult to get the predicted label.
+  Returns a comma separated string with true label and predicted label.
+  """
+  def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]:
     label, prediction_result = element
     prediction = prediction_result.inference
-    yield {label: prediction}
+    yield '{},{}'.format(label, prediction)
 
 
 def parse_known_args(argv):
@@ -52,12 +62,17 @@ def parse_known_args(argv):
   parser.add_argument(
       '--input_file',
       dest='input',
+      required=True,
       help='CSV file with row containing label and pixel values.')
   parser.add_argument(
-      '--output', dest='output', help='Path to save output predictions.')
+      '--output',
+      dest='output',
+      required=True,
+      help='Path to save output predictions.')
   parser.add_argument(
       '--model_path',
       dest='model_path',
+      required=True,
       help='Path to load the Sklearn model for Inference.')
   return parser.parse_known_args(argv)
 
@@ -68,24 +83,26 @@ def run(argv=None, save_main_session=True):
   pipeline_options = PipelineOptions(pipeline_args)
   pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
 
-  model_loader = SklearnModelLoader(
-      model_file_type=ModelFileType.PICKLE, model_uri=known_args.model_path)
+  # In this example we pass keyed inputs to RunInference transform.
+  # Therefore, we use KeyedModelHandler wrapper over SklearnModelHandlerNumpy.
+  model_loader = KeyedModelHandler(
+      SklearnModelHandlerNumpy(
+          model_file_type=ModelFileType.PICKLE,
+          model_uri=known_args.model_path))
 
   with beam.Pipeline(options=pipeline_options) as p:
     label_pixel_tuple = (
         p
         | "ReadFromInput" >> beam.io.ReadFromText(
             known_args.input, skip_header_lines=1)
-        | "Process inputs" >> beam.Map(process_input))
+        | "PreProcessInputs" >> beam.Map(process_input))
 
     predictions = (
         label_pixel_tuple
-        | "RunInference" >> RunInference(model_loader).with_output_types(
-            Tuple[int, PredictionResult])
-        | "PostProcessor" >> beam.ParDo(PostProcessor()))
+        | "RunInference" >> RunInference(model_loader)
+        | "PostProcessOutputs" >> beam.ParDo(PostProcessor()))
 
-    if known_args.output:
-      predictions | "WriteOutputToGCS" >> beam.io.WriteToText( # pylint: disable=expression-not-assigned
+    _ = predictions | "WriteOutputToGCS" >> beam.io.WriteToText(
         known_args.output,
         shard_name_template='',
         append_trailing_newlines=True)

From e4cf3f3341aa1ae2deae83208da40178c56d14cc Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 11:05:08 -0400
Subject: [PATCH 04/12] Refactor and add assertions to the sklearn test

---
 .../ml/inference/sklearn_inference_it_test.py | 32 ++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
index dc13ceca85e2..0659b1246be2 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
@@ -21,20 +21,29 @@
 import pytest
 import unittest
 import uuid
+from typing import List
 
 from apache_beam.io.filesystems import FileSystems
 from apache_beam.examples.inference import sklearn_mnist_classification
 from apache_beam.testing.test_pipeline import TestPipeline
 
 
+def process_outputs(filepath: str) -> List[str]:
+  with FileSystems().open(filepath) as f:
+    lines = f.readlines()
+  lines = [l.decode('utf-8').strip('\n') for l in lines]
+  return lines
+
+
+@pytest.mark.skip
 class SklearnInference(unittest.TestCase):
   @pytest.mark.it_postcommit
-  def test_predictions_output_file(self):
-    test_pipeline = TestPipeline(is_integration_test=True)
+  def test_sklearn_mnist_classification(self):
+    test_pipeline = TestPipeline(is_integration_test=False)
     input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv'
-    output_file_dir = 'gs://apache-beam-ml/testing/predictions'  # pylint: disable=line-too-long
+    output_file_dir = 'gs://temp-storage-for-end-to-end-tests'
     output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt'])
-    model_path = 'gs://apache-beam-ml/models/mnist_model_svm.pickle'  # pylint: disable=line-too-long
+    model_path = 'gs://apache-beam-ml/models/mnist_model_svm.pickle'
     extra_opts = {
         'input': input_file,
         'output': output_file,
@@ -45,6 +54,21 @@ def test_predictions_output_file(self):
         save_main_session=False)
     self.assertEqual(FileSystems().exists(output_file), True)
 
+    expected_output_filepath = 'gs://apache-beam-ml/testing/expected_outputs/test_sklearn_mnist_classification_actuals.txt'  # pylint: disable=line-too-long
+    expected_outputs = process_outputs(expected_output_filepath)
+
+    predicted_outputs = process_outputs(output_file)
+    self.assertEqual(len(expected_outputs), len(predicted_outputs))
+
+    predictions_dict = {}
+    for i in range(len(predicted_outputs)):
+      true_label, prediction = predicted_outputs[i].split(',')
+      predictions_dict[true_label] = prediction
+
+    for i in range(len(expected_outputs)):
+      true_label, expected_prediction = expected_outputs[i].split(',')
+      self.assertEqual(predictions_dict[true_label], expected_prediction)
+
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.DEBUG)

From 6b6d6b1b83e10c97779fe71d4328407360058c42 Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 11:06:14 -0400
Subject: [PATCH 05/12] Fixup import order

---
 .../apache_beam/ml/inference/sklearn_inference_it_test.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
index 0659b1246be2..ffcae83d9ff3 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
@@ -23,8 +23,8 @@
 import uuid
 from typing import List
 
-from apache_beam.io.filesystems import FileSystems
 from apache_beam.examples.inference import sklearn_mnist_classification
+from apache_beam.io.filesystems import FileSystems
 from apache_beam.testing.test_pipeline import TestPipeline
 
 

From 0375d74935693fa1695c3fe4868b8e459b2ed745 Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 11:09:27 -0400
Subject: [PATCH 06/12] fixup: help and name

---
 .../examples/inference/sklearn_mnist_classification.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
index a48d9d00ceaf..13ebabf56d45 100644
--- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
+++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py
@@ -63,7 +63,7 @@ def parse_known_args(argv):
       '--input_file',
       dest='input',
       required=True,
-      help='CSV file with row containing label and pixel values.')
+      help='text file with comma separated int values.')
   parser.add_argument(
       '--output',
       dest='output',
@@ -102,7 +102,7 @@ def run(argv=None, save_main_session=True):
         | "RunInference" >> RunInference(model_loader)
         | "PostProcessOutputs" >> beam.ParDo(PostProcessor()))
 
-    _ = predictions | "WriteOutputToGCS" >> beam.io.WriteToText(
+    _ = predictions | "WriteOutput" >> beam.io.WriteToText(
         known_args.output,
         shard_name_template='',
         append_trailing_newlines=True)

From 1ccdbf3018e2da52c4bfbf8eaf5804c1d8671f08 Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 12:15:08 -0400
Subject: [PATCH 07/12] Add gradle task for sklearn IT tests

---
 .../ml/inference/sklearn_inference_it_test.py |  2 +-
 sdks/python/pytest.ini                        |  1 +
 sdks/python/test-suites/direct/common.gradle  | 26 +++++++++++++++++--
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
index ffcae83d9ff3..d2ed97e09045 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
@@ -36,8 +36,8 @@ def process_outputs(filepath: str) -> List[str]:
 
 
 @pytest.mark.skip
+@pytest.mark.it_run_inference
 class SklearnInference(unittest.TestCase):
-  @pytest.mark.it_postcommit
   def test_sklearn_mnist_classification(self):
     test_pipeline = TestPipeline(is_integration_test=False)
     input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv'
diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini
index e72ccd15d149..c3c0aaa83990 100644
--- a/sdks/python/pytest.ini
+++ b/sdks/python/pytest.ini
@@ -48,6 +48,7 @@ markers =
     # We run these tests with multiple major pyarrow versions (BEAM-11211)
     uses_pyarrow: tests that utilize pyarrow in some way
     uses_pytorch: tests that utilize pytorch in some way
+    it_run_inference: collects for RunInference integration test runs.
 
 # Default timeout intended for unit tests.
 # If certain tests need a different value, please see the docs on how to
diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle
index f08655e02810..c1ea1805b0e0 100644
--- a/sdks/python/test-suites/direct/common.gradle
+++ b/sdks/python/test-suites/direct/common.gradle
@@ -187,7 +187,7 @@ tasks.register("hdfsIntegrationTest") {
 }
 
 // Pytorch RunInference IT tests
-task torchTests {
+task torchInferenceTest {
   dependsOn 'installGcpTest'
   dependsOn ':sdks:python:sdist'
   def requirementsFile = "${rootDir}/sdks/python/apache_beam/ml/inference/torch_tests_requirements.txt"
@@ -213,8 +213,30 @@ task torchTests {
     }
 }
 
+task sklearnInferenceTest {
+  dependsOn 'installGcpTest'
+  dependsOn ':sdks:python:sdist'
+  doLast {
+      def testOpts = basicTestOpts
+      def argMap = [
+          "test_opts": testOpts,
+          "suite": "postCommitIT-direct-py${pythonVersionSuffix}",
+          "collect": "it_run_inference",
+          "runner": "TestDirectRunner"
+      ]
+      def cmdArgs = mapToArgString(argMap)
+      exec {
+        executable 'sh'
+        args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
+      }
+    }
+}
+
 // Add all the RunInference framework IT tests to this gradle task that runs on Direct Runner Post commit suite.
 // TODO(anandinguva): Add sklearn IT test here
 project.tasks.register("inferencePostCommitIT") {
-  dependsOn = ['torchTests']
+  dependsOn = [
+  'torchInferenceTest',
+  'sklearnInferenceTest'
+  ]
 }

From c472974ca067f1cd406704b770d889dadb26c8b3 Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 12:30:54 -0400
Subject: [PATCH 08/12] fixup lint

---
 .../apache_beam/ml/inference/sklearn_inference_it_test.py   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
index d2ed97e09045..85aff577de65 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
@@ -18,17 +18,17 @@
 """End-to-End test for Sklearn Inference"""
 
 import logging
-import pytest
 import unittest
 import uuid
-from typing import List
+
+import pytest
 
 from apache_beam.examples.inference import sklearn_mnist_classification
 from apache_beam.io.filesystems import FileSystems
 from apache_beam.testing.test_pipeline import TestPipeline
 
 
-def process_outputs(filepath: str) -> List[str]:
+def process_outputs(filepath):
   with FileSystems().open(filepath) as f:
     lines = f.readlines()
   lines = [l.decode('utf-8').strip('\n') for l in lines]

From 2c61681f715ee54f557ef0c4e8225fe296a5cd9b Mon Sep 17 00:00:00 2001
From: Anand Inguva <34158215+AnandInguva@users.noreply.github.com>
Date: Wed, 15 Jun 2022 12:41:55 -0400
Subject: [PATCH 09/12] Update sdks/python/test-suites/direct/common.gradle

Co-authored-by: Andy Ye <andyye333@gmail.com>
---
 sdks/python/test-suites/direct/common.gradle | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle
index c1ea1805b0e0..3033326ab6e1 100644
--- a/sdks/python/test-suites/direct/common.gradle
+++ b/sdks/python/test-suites/direct/common.gradle
@@ -212,7 +212,7 @@ task torchInferenceTest {
       }
     }
 }
-
+// Scikit-learn RunInference IT tests
 task sklearnInferenceTest {
   dependsOn 'installGcpTest'
   dependsOn ':sdks:python:sdist'

From 3f527be5049e71fb95f262a084da4e5d5172dafa Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 15:45:19 -0400
Subject: [PATCH 10/12] Change sklearn IT test marker

---
 .../apache_beam/ml/inference/sklearn_inference_it_test.py  | 3 ++-
 sdks/python/pytest.ini                                     | 2 +-
 sdks/python/test-suites/direct/common.gradle               | 7 +++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
index 85aff577de65..be5da3f4320b 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py
@@ -36,7 +36,8 @@ def process_outputs(filepath):
 
 
 @pytest.mark.skip
-@pytest.mark.it_run_inference
+@pytest.mark.uses_sklearn
+@pytest.mark.it_postcommit
 class SklearnInference(unittest.TestCase):
   def test_sklearn_mnist_classification(self):
     test_pipeline = TestPipeline(is_integration_test=False)
diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini
index c3c0aaa83990..5973c83ca4f3 100644
--- a/sdks/python/pytest.ini
+++ b/sdks/python/pytest.ini
@@ -48,7 +48,7 @@ markers =
     # We run these tests with multiple major pyarrow versions (BEAM-11211)
     uses_pyarrow: tests that utilize pyarrow in some way
     uses_pytorch: tests that utilize pytorch in some way
-    it_run_inference: collects for RunInference integration test runs.
+    uses_sklearn: collects for Sklearn RunInference integration test runs.
 
 # Default timeout intended for unit tests.
 # If certain tests need a different value, please see the docs on how to
diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle
index 3033326ab6e1..7c3db8bf3201 100644
--- a/sdks/python/test-suites/direct/common.gradle
+++ b/sdks/python/test-suites/direct/common.gradle
@@ -214,14 +214,14 @@ task torchInferenceTest {
 }
 // Scikit-learn RunInference IT tests
 task sklearnInferenceTest {
-  dependsOn 'installGcpTest'
-  dependsOn ':sdks:python:sdist'
+//   dependsOn 'installGcpTest'
+//   dependsOn ':sdks:python:sdist'
   doLast {
       def testOpts = basicTestOpts
       def argMap = [
           "test_opts": testOpts,
           "suite": "postCommitIT-direct-py${pythonVersionSuffix}",
-          "collect": "it_run_inference",
+          "collect": "uses_sklearn and it_postcommit" ,
           "runner": "TestDirectRunner"
       ]
       def cmdArgs = mapToArgString(argMap)
@@ -233,7 +233,6 @@ task sklearnInferenceTest {
 }
 
 // Add all the RunInference framework IT tests to this gradle task that runs on Direct Runner Post commit suite.
-// TODO(anandinguva): Add sklearn IT test here
 project.tasks.register("inferencePostCommitIT") {
   dependsOn = [
   'torchInferenceTest',

From d70eee6897e45142a6616a8f4719c1445f857fec Mon Sep 17 00:00:00 2001
From: Anand Inguva <anandinguva98@gmail.com>
Date: Wed, 15 Jun 2022 15:47:37 -0400
Subject: [PATCH 11/12] Uncomment

---
 sdks/python/test-suites/direct/common.gradle | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle
index 7c3db8bf3201..44a6e97d2c76 100644
--- a/sdks/python/test-suites/direct/common.gradle
+++ b/sdks/python/test-suites/direct/common.gradle
@@ -214,8 +214,8 @@ task torchInferenceTest {
 }
 // Scikit-learn RunInference IT tests
 task sklearnInferenceTest {
-//   dependsOn 'installGcpTest'
-//   dependsOn ':sdks:python:sdist'
+  dependsOn 'installGcpTest'
+  dependsOn ':sdks:python:sdist'
   doLast {
       def testOpts = basicTestOpts
       def argMap = [

From fa42b67c904faa0abf3e064870ac51f1d95d3789 Mon Sep 17 00:00:00 2001
From: Anand Inguva <34158215+AnandInguva@users.noreply.github.com>
Date: Wed, 15 Jun 2022 15:56:02 -0400
Subject: [PATCH 12/12] Apply suggestions from code review

Co-authored-by: Andy Ye <andyye333@gmail.com>
---
 sdks/python/pytest.ini                       | 2 +-
 sdks/python/test-suites/direct/common.gradle | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini
index 5973c83ca4f3..0a9a274f1f1a 100644
--- a/sdks/python/pytest.ini
+++ b/sdks/python/pytest.ini
@@ -48,7 +48,7 @@ markers =
     # We run these tests with multiple major pyarrow versions (BEAM-11211)
     uses_pyarrow: tests that utilize pyarrow in some way
     uses_pytorch: tests that utilize pytorch in some way
-    uses_sklearn: collects for Sklearn RunInference integration test runs.
+    uses_sklearn: tests that utilize scikit-learn in some way
 
 # Default timeout intended for unit tests.
 # If certain tests need a different value, please see the docs on how to
diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle
index 44a6e97d2c76..67fb321c0c07 100644
--- a/sdks/python/test-suites/direct/common.gradle
+++ b/sdks/python/test-suites/direct/common.gradle
@@ -211,6 +211,7 @@ task torchInferenceTest {
         args '-c', ". ${envdir}/bin/activate && export FORCE_TORCH_IT=1 && ${runScriptsDir}/run_integration_test.sh $cmdArgs"
       }
     }
+
 }
 // Scikit-learn RunInference IT tests
 task sklearnInferenceTest {