From 29767cf2bc0cd16b9006b0ba9990a46c920390ec Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Mon, 23 May 2022 13:10:36 -0400 Subject: [PATCH 01/12] sklearn example and IT test --- .test-infra/jenkins/jira_utils/__init__.py | 16 ++++ .../inference/sklearn_inference_example.py | 95 +++++++++++++++++++ .../ml/inference/sklearn_inference_it_test.py | 51 ++++++++++ 3 files changed, 162 insertions(+) create mode 100644 .test-infra/jenkins/jira_utils/__init__.py create mode 100644 sdks/python/apache_beam/examples/inference/sklearn_inference_example.py create mode 100644 sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py diff --git a/.test-infra/jenkins/jira_utils/__init__.py b/.test-infra/jenkins/jira_utils/__init__.py new file mode 100644 index 000000000000..cce3acad34a4 --- /dev/null +++ b/.test-infra/jenkins/jira_utils/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py b/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py new file mode 100644 index 000000000000..caf61de41594 --- /dev/null +++ b/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py @@ -0,0 +1,95 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""A pipeline that uses RunInference API to perform image classification.""" + +import argparse +from typing import Dict +from typing import Iterable +from typing import List +from typing import Tuple + +import apache_beam as beam +from apache_beam.ml.inference.api import PredictionResult +from apache_beam.ml.inference.api import RunInference +from apache_beam.ml.inference.sklearn_inference import ModelFileType +from apache_beam.ml.inference.sklearn_inference import SklearnModelLoader +from apache_beam.options.pipeline_options import PipelineOptions +from apache_beam.options.pipeline_options import SetupOptions + + +def process_input(row: str) -> Tuple[int, List[int]]: + data = row.split(',') + label, pixels = int(data[0]), data[1:] + pixels = [int(pixel) for pixel in pixels] + return label, pixels + + +class PostProcessor(beam.DoFn): + def process(self, element: Tuple[int, PredictionResult]) -> Iterable[Dict]: + label, prediction_result = element + prediction = prediction_result.inference + yield {label: prediction} + + +def parse_known_args(argv): + """Parses args for the workflow.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--input_file', + dest='input', + help='CSV file with row containing label and pixel values.') + parser.add_argument( + '--output', dest='output', help='Path to save output predictions.') + parser.add_argument( + '--model_path', + dest='model_path', + help='Path to load the Sklearn model for Inference.') + return parser.parse_known_args(argv) + + +def run(argv=None, save_main_session=True): + """Entry point. Defines and runs the pipeline.""" + known_args, pipeline_args = parse_known_args(argv) + pipeline_options = PipelineOptions(pipeline_args) + pipeline_options.view_as(SetupOptions).save_main_session = save_main_session + + model_loader = SklearnModelLoader( + model_file_type=ModelFileType.PICKLE, model_uri=known_args.model_path) + + with beam.Pipeline(options=pipeline_options) as p: + label_pixel_tuple = ( + p + | "ReadFromInput" >> beam.io.ReadFromText( + known_args.input, skip_header_lines=1) + | "Process inputs" >> beam.Map(process_input)) + + predictions = ( + label_pixel_tuple + | "RunInference" >> RunInference(model_loader).with_output_types( + Tuple[int, PredictionResult]) + | "PostProcessor" >> beam.ParDo(PostProcessor())) + + if known_args.output: + predictions | "WriteOutputToGCS" >> beam.io.WriteToText( # pylint: disable=expression-not-assigned + known_args.output, + shard_name_template='', + append_trailing_newlines=True) + + +if __name__ == '__main__': + run() diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py new file mode 100644 index 000000000000..a864dd4414c2 --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""End-to-End test for Sklearn Inference""" + +import logging +import pytest +import unittest +import uuid + +from apache_beam.io.filesystems import FileSystems +from apache_beam.examples.inference import sklearn_inference_example +from apache_beam.testing.test_pipeline import TestPipeline + + +class SklearnInference(unittest.TestCase): + @pytest.mark.it_postcommit + def test_predictions_output_file(self): + test_pipeline = TestPipeline(is_integration_test=False) + input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv' + output_file_dir = 'gs://apache-beam-ml/testing/predictions' # pylint: disable=line-too-long + output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) + model_path = 'gs://apache-beam-ml/models/mnist_model_svm.pickle' # pylint: disable=line-too-long + extra_opts = { + 'input': input_file, + 'output': output_file, + 'model_path': model_path, + } + sklearn_inference_example.run( + test_pipeline.get_full_options_as_args(**extra_opts), + save_main_session=False) + self.assertEqual(FileSystems().exists(output_file), True) + + +if __name__ == '__main__': + logging.getLogger().setLevel(logging.DEBUG) + unittest.main() From f7a36a6d5a8f56157cea1f3e4343c9a3fdc84641 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Thu, 9 Jun 2022 18:25:51 -0400 Subject: [PATCH 02/12] Change the example name --- ...inference_example.py => sklearn_mnist_classification.py} | 0 .../apache_beam/ml/inference/sklearn_inference_it_test.py | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) rename sdks/python/apache_beam/examples/inference/{sklearn_inference_example.py => sklearn_mnist_classification.py} (100%) diff --git a/sdks/python/apache_beam/examples/inference/sklearn_inference_example.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py similarity index 100% rename from sdks/python/apache_beam/examples/inference/sklearn_inference_example.py rename to sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py index a864dd4414c2..dc13ceca85e2 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py @@ -23,14 +23,14 @@ import uuid from apache_beam.io.filesystems import FileSystems -from apache_beam.examples.inference import sklearn_inference_example +from apache_beam.examples.inference import sklearn_mnist_classification from apache_beam.testing.test_pipeline import TestPipeline class SklearnInference(unittest.TestCase): @pytest.mark.it_postcommit def test_predictions_output_file(self): - test_pipeline = TestPipeline(is_integration_test=False) + test_pipeline = TestPipeline(is_integration_test=True) input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv' output_file_dir = 'gs://apache-beam-ml/testing/predictions' # pylint: disable=line-too-long output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) @@ -40,7 +40,7 @@ def test_predictions_output_file(self): 'output': output_file, 'model_path': model_path, } - sklearn_inference_example.run( + sklearn_mnist_classification.run( test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) From 0be2a8b2330c8b852697f9f5aaabc72220c0d4e2 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 10:17:19 -0400 Subject: [PATCH 03/12] Refactor sklearn example --- .test-infra/jenkins/jira_utils/__init__.py | 16 ------ .../inference/sklearn_mnist_classification.py | 49 +++++++++++++------ 2 files changed, 33 insertions(+), 32 deletions(-) delete mode 100644 .test-infra/jenkins/jira_utils/__init__.py diff --git a/.test-infra/jenkins/jira_utils/__init__.py b/.test-infra/jenkins/jira_utils/__init__.py deleted file mode 100644 index cce3acad34a4..000000000000 --- a/.test-infra/jenkins/jira_utils/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py index caf61de41594..a48d9d00ceaf 100644 --- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py @@ -15,19 +15,26 @@ # limitations under the License. # -"""A pipeline that uses RunInference API to perform image classification.""" +"""A pipeline that uses RunInference API to classify MNIST data. + +This pipeline takes a text file in which data is comma separated ints. The first +column would be the true label and the rest would be the pixel values. The data +is processed and then a model trained on the MNIST data would be used to perform +the inference. The pipeline writes the prediction to an output file in which +users can then compare against the true label. +""" import argparse -from typing import Dict from typing import Iterable from typing import List from typing import Tuple import apache_beam as beam -from apache_beam.ml.inference.api import PredictionResult -from apache_beam.ml.inference.api import RunInference +from apache_beam.ml.inference.base import KeyedModelHandler +from apache_beam.ml.inference.base import PredictionResult +from apache_beam.ml.inference.base import RunInference from apache_beam.ml.inference.sklearn_inference import ModelFileType -from apache_beam.ml.inference.sklearn_inference import SklearnModelLoader +from apache_beam.ml.inference.sklearn_inference import SklearnModelHandlerNumpy from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.options.pipeline_options import SetupOptions @@ -40,10 +47,13 @@ def process_input(row: str) -> Tuple[int, List[int]]: class PostProcessor(beam.DoFn): - def process(self, element: Tuple[int, PredictionResult]) -> Iterable[Dict]: + """Process the PredictionResult to get the predicted label. + Returns a comma separated string with true label and predicted label. + """ + def process(self, element: Tuple[int, PredictionResult]) -> Iterable[str]: label, prediction_result = element prediction = prediction_result.inference - yield {label: prediction} + yield '{},{}'.format(label, prediction) def parse_known_args(argv): @@ -52,12 +62,17 @@ def parse_known_args(argv): parser.add_argument( '--input_file', dest='input', + required=True, help='CSV file with row containing label and pixel values.') parser.add_argument( - '--output', dest='output', help='Path to save output predictions.') + '--output', + dest='output', + required=True, + help='Path to save output predictions.') parser.add_argument( '--model_path', dest='model_path', + required=True, help='Path to load the Sklearn model for Inference.') return parser.parse_known_args(argv) @@ -68,24 +83,26 @@ def run(argv=None, save_main_session=True): pipeline_options = PipelineOptions(pipeline_args) pipeline_options.view_as(SetupOptions).save_main_session = save_main_session - model_loader = SklearnModelLoader( - model_file_type=ModelFileType.PICKLE, model_uri=known_args.model_path) + # In this example we pass keyed inputs to RunInference transform. + # Therefore, we use KeyedModelHandler wrapper over SklearnModelHandlerNumpy. + model_loader = KeyedModelHandler( + SklearnModelHandlerNumpy( + model_file_type=ModelFileType.PICKLE, + model_uri=known_args.model_path)) with beam.Pipeline(options=pipeline_options) as p: label_pixel_tuple = ( p | "ReadFromInput" >> beam.io.ReadFromText( known_args.input, skip_header_lines=1) - | "Process inputs" >> beam.Map(process_input)) + | "PreProcessInputs" >> beam.Map(process_input)) predictions = ( label_pixel_tuple - | "RunInference" >> RunInference(model_loader).with_output_types( - Tuple[int, PredictionResult]) - | "PostProcessor" >> beam.ParDo(PostProcessor())) + | "RunInference" >> RunInference(model_loader) + | "PostProcessOutputs" >> beam.ParDo(PostProcessor())) - if known_args.output: - predictions | "WriteOutputToGCS" >> beam.io.WriteToText( # pylint: disable=expression-not-assigned + _ = predictions | "WriteOutputToGCS" >> beam.io.WriteToText( known_args.output, shard_name_template='', append_trailing_newlines=True) From e4cf3f3341aa1ae2deae83208da40178c56d14cc Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 11:05:08 -0400 Subject: [PATCH 04/12] Refactor and add assertions to the sklearn test --- .../ml/inference/sklearn_inference_it_test.py | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py index dc13ceca85e2..0659b1246be2 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py @@ -21,20 +21,29 @@ import pytest import unittest import uuid +from typing import List from apache_beam.io.filesystems import FileSystems from apache_beam.examples.inference import sklearn_mnist_classification from apache_beam.testing.test_pipeline import TestPipeline +def process_outputs(filepath: str) -> List[str]: + with FileSystems().open(filepath) as f: + lines = f.readlines() + lines = [l.decode('utf-8').strip('\n') for l in lines] + return lines + + +@pytest.mark.skip class SklearnInference(unittest.TestCase): @pytest.mark.it_postcommit - def test_predictions_output_file(self): - test_pipeline = TestPipeline(is_integration_test=True) + def test_sklearn_mnist_classification(self): + test_pipeline = TestPipeline(is_integration_test=False) input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv' - output_file_dir = 'gs://apache-beam-ml/testing/predictions' # pylint: disable=line-too-long + output_file_dir = 'gs://temp-storage-for-end-to-end-tests' output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) - model_path = 'gs://apache-beam-ml/models/mnist_model_svm.pickle' # pylint: disable=line-too-long + model_path = 'gs://apache-beam-ml/models/mnist_model_svm.pickle' extra_opts = { 'input': input_file, 'output': output_file, @@ -45,6 +54,21 @@ def test_predictions_output_file(self): save_main_session=False) self.assertEqual(FileSystems().exists(output_file), True) + expected_output_filepath = 'gs://apache-beam-ml/testing/expected_outputs/test_sklearn_mnist_classification_actuals.txt' # pylint: disable=line-too-long + expected_outputs = process_outputs(expected_output_filepath) + + predicted_outputs = process_outputs(output_file) + self.assertEqual(len(expected_outputs), len(predicted_outputs)) + + predictions_dict = {} + for i in range(len(predicted_outputs)): + true_label, prediction = predicted_outputs[i].split(',') + predictions_dict[true_label] = prediction + + for i in range(len(expected_outputs)): + true_label, expected_prediction = expected_outputs[i].split(',') + self.assertEqual(predictions_dict[true_label], expected_prediction) + if __name__ == '__main__': logging.getLogger().setLevel(logging.DEBUG) From 6b6d6b1b83e10c97779fe71d4328407360058c42 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 11:06:14 -0400 Subject: [PATCH 05/12] Fixup import order --- .../apache_beam/ml/inference/sklearn_inference_it_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py index 0659b1246be2..ffcae83d9ff3 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py @@ -23,8 +23,8 @@ import uuid from typing import List -from apache_beam.io.filesystems import FileSystems from apache_beam.examples.inference import sklearn_mnist_classification +from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline From 0375d74935693fa1695c3fe4868b8e459b2ed745 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 11:09:27 -0400 Subject: [PATCH 06/12] fixup: help and name --- .../examples/inference/sklearn_mnist_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py index a48d9d00ceaf..13ebabf56d45 100644 --- a/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py +++ b/sdks/python/apache_beam/examples/inference/sklearn_mnist_classification.py @@ -63,7 +63,7 @@ def parse_known_args(argv): '--input_file', dest='input', required=True, - help='CSV file with row containing label and pixel values.') + help='text file with comma separated int values.') parser.add_argument( '--output', dest='output', @@ -102,7 +102,7 @@ def run(argv=None, save_main_session=True): | "RunInference" >> RunInference(model_loader) | "PostProcessOutputs" >> beam.ParDo(PostProcessor())) - _ = predictions | "WriteOutputToGCS" >> beam.io.WriteToText( + _ = predictions | "WriteOutput" >> beam.io.WriteToText( known_args.output, shard_name_template='', append_trailing_newlines=True) From 1ccdbf3018e2da52c4bfbf8eaf5804c1d8671f08 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 12:15:08 -0400 Subject: [PATCH 07/12] Add gradle task for sklearn IT tests --- .../ml/inference/sklearn_inference_it_test.py | 2 +- sdks/python/pytest.ini | 1 + sdks/python/test-suites/direct/common.gradle | 26 +++++++++++++++++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py index ffcae83d9ff3..d2ed97e09045 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py @@ -36,8 +36,8 @@ def process_outputs(filepath: str) -> List[str]: @pytest.mark.skip +@pytest.mark.it_run_inference class SklearnInference(unittest.TestCase): - @pytest.mark.it_postcommit def test_sklearn_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=False) input_file = 'gs://apache-beam-ml/testing/inputs/it_mnist_data.csv' diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index e72ccd15d149..c3c0aaa83990 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -48,6 +48,7 @@ markers = # We run these tests with multiple major pyarrow versions (BEAM-11211) uses_pyarrow: tests that utilize pyarrow in some way uses_pytorch: tests that utilize pytorch in some way + it_run_inference: collects for RunInference integration test runs. # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index f08655e02810..c1ea1805b0e0 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -187,7 +187,7 @@ tasks.register("hdfsIntegrationTest") { } // Pytorch RunInference IT tests -task torchTests { +task torchInferenceTest { dependsOn 'installGcpTest' dependsOn ':sdks:python:sdist' def requirementsFile = "${rootDir}/sdks/python/apache_beam/ml/inference/torch_tests_requirements.txt" @@ -213,8 +213,30 @@ task torchTests { } } +task sklearnInferenceTest { + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' + doLast { + def testOpts = basicTestOpts + def argMap = [ + "test_opts": testOpts, + "suite": "postCommitIT-direct-py${pythonVersionSuffix}", + "collect": "it_run_inference", + "runner": "TestDirectRunner" + ] + def cmdArgs = mapToArgString(argMap) + exec { + executable 'sh' + args '-c', ". ${envdir}/bin/activate && ${runScriptsDir}/run_integration_test.sh $cmdArgs" + } + } +} + // Add all the RunInference framework IT tests to this gradle task that runs on Direct Runner Post commit suite. // TODO(anandinguva): Add sklearn IT test here project.tasks.register("inferencePostCommitIT") { - dependsOn = ['torchTests'] + dependsOn = [ + 'torchInferenceTest', + 'sklearnInferenceTest' + ] } From c472974ca067f1cd406704b770d889dadb26c8b3 Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 12:30:54 -0400 Subject: [PATCH 08/12] fixup lint --- .../apache_beam/ml/inference/sklearn_inference_it_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py index d2ed97e09045..85aff577de65 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py @@ -18,17 +18,17 @@ """End-to-End test for Sklearn Inference""" import logging -import pytest import unittest import uuid -from typing import List + +import pytest from apache_beam.examples.inference import sklearn_mnist_classification from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline -def process_outputs(filepath: str) -> List[str]: +def process_outputs(filepath): with FileSystems().open(filepath) as f: lines = f.readlines() lines = [l.decode('utf-8').strip('\n') for l in lines] From 2c61681f715ee54f557ef0c4e8225fe296a5cd9b Mon Sep 17 00:00:00 2001 From: Anand Inguva <34158215+AnandInguva@users.noreply.github.com> Date: Wed, 15 Jun 2022 12:41:55 -0400 Subject: [PATCH 09/12] Update sdks/python/test-suites/direct/common.gradle Co-authored-by: Andy Ye --- sdks/python/test-suites/direct/common.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index c1ea1805b0e0..3033326ab6e1 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -212,7 +212,7 @@ task torchInferenceTest { } } } - +// Scikit-learn RunInference IT tests task sklearnInferenceTest { dependsOn 'installGcpTest' dependsOn ':sdks:python:sdist' From 3f527be5049e71fb95f262a084da4e5d5172dafa Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 15:45:19 -0400 Subject: [PATCH 10/12] Change sklearn IT test marker --- .../apache_beam/ml/inference/sklearn_inference_it_test.py | 3 ++- sdks/python/pytest.ini | 2 +- sdks/python/test-suites/direct/common.gradle | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py index 85aff577de65..be5da3f4320b 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_it_test.py @@ -36,7 +36,8 @@ def process_outputs(filepath): @pytest.mark.skip -@pytest.mark.it_run_inference +@pytest.mark.uses_sklearn +@pytest.mark.it_postcommit class SklearnInference(unittest.TestCase): def test_sklearn_mnist_classification(self): test_pipeline = TestPipeline(is_integration_test=False) diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index c3c0aaa83990..5973c83ca4f3 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -48,7 +48,7 @@ markers = # We run these tests with multiple major pyarrow versions (BEAM-11211) uses_pyarrow: tests that utilize pyarrow in some way uses_pytorch: tests that utilize pytorch in some way - it_run_inference: collects for RunInference integration test runs. + uses_sklearn: collects for Sklearn RunInference integration test runs. # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 3033326ab6e1..7c3db8bf3201 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -214,14 +214,14 @@ task torchInferenceTest { } // Scikit-learn RunInference IT tests task sklearnInferenceTest { - dependsOn 'installGcpTest' - dependsOn ':sdks:python:sdist' +// dependsOn 'installGcpTest' +// dependsOn ':sdks:python:sdist' doLast { def testOpts = basicTestOpts def argMap = [ "test_opts": testOpts, "suite": "postCommitIT-direct-py${pythonVersionSuffix}", - "collect": "it_run_inference", + "collect": "uses_sklearn and it_postcommit" , "runner": "TestDirectRunner" ] def cmdArgs = mapToArgString(argMap) @@ -233,7 +233,6 @@ task sklearnInferenceTest { } // Add all the RunInference framework IT tests to this gradle task that runs on Direct Runner Post commit suite. -// TODO(anandinguva): Add sklearn IT test here project.tasks.register("inferencePostCommitIT") { dependsOn = [ 'torchInferenceTest', From d70eee6897e45142a6616a8f4719c1445f857fec Mon Sep 17 00:00:00 2001 From: Anand Inguva Date: Wed, 15 Jun 2022 15:47:37 -0400 Subject: [PATCH 11/12] Uncomment --- sdks/python/test-suites/direct/common.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 7c3db8bf3201..44a6e97d2c76 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -214,8 +214,8 @@ task torchInferenceTest { } // Scikit-learn RunInference IT tests task sklearnInferenceTest { -// dependsOn 'installGcpTest' -// dependsOn ':sdks:python:sdist' + dependsOn 'installGcpTest' + dependsOn ':sdks:python:sdist' doLast { def testOpts = basicTestOpts def argMap = [ From fa42b67c904faa0abf3e064870ac51f1d95d3789 Mon Sep 17 00:00:00 2001 From: Anand Inguva <34158215+AnandInguva@users.noreply.github.com> Date: Wed, 15 Jun 2022 15:56:02 -0400 Subject: [PATCH 12/12] Apply suggestions from code review Co-authored-by: Andy Ye --- sdks/python/pytest.ini | 2 +- sdks/python/test-suites/direct/common.gradle | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sdks/python/pytest.ini b/sdks/python/pytest.ini index 5973c83ca4f3..0a9a274f1f1a 100644 --- a/sdks/python/pytest.ini +++ b/sdks/python/pytest.ini @@ -48,7 +48,7 @@ markers = # We run these tests with multiple major pyarrow versions (BEAM-11211) uses_pyarrow: tests that utilize pyarrow in some way uses_pytorch: tests that utilize pytorch in some way - uses_sklearn: collects for Sklearn RunInference integration test runs. + uses_sklearn: tests that utilize scikit-learn in some way # Default timeout intended for unit tests. # If certain tests need a different value, please see the docs on how to diff --git a/sdks/python/test-suites/direct/common.gradle b/sdks/python/test-suites/direct/common.gradle index 44a6e97d2c76..67fb321c0c07 100644 --- a/sdks/python/test-suites/direct/common.gradle +++ b/sdks/python/test-suites/direct/common.gradle @@ -211,6 +211,7 @@ task torchInferenceTest { args '-c', ". ${envdir}/bin/activate && export FORCE_TORCH_IT=1 && ${runScriptsDir}/run_integration_test.sh $cmdArgs" } } + } // Scikit-learn RunInference IT tests task sklearnInferenceTest {