diff --git a/sdks/python/apache_beam/ml/inference/__init__.py b/sdks/python/apache_beam/ml/inference/__init__.py new file mode 100644 index 000000000000..cce3acad34a4 --- /dev/null +++ b/sdks/python/apache_beam/ml/inference/__init__.py @@ -0,0 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/sdks/python/apache_beam/ml/inference/api.py b/sdks/python/apache_beam/ml/inference/api.py index 73dd87bb4e95..10a5a306704a 100644 --- a/sdks/python/apache_beam/ml/inference/api.py +++ b/sdks/python/apache_beam/ml/inference/api.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# mypy: ignore-errors from dataclasses import dataclass from typing import Tuple @@ -51,7 +52,7 @@ class RunInference(beam.PTransform): TODO(BEAM-14046): Add and link to help documentation """ - def __init__(self, model_loader: base.ModelLoader) -> beam.pvalue.PCollection: + def __init__(self, model_loader: base.ModelLoader): self._model_loader = model_loader def expand(self, pcoll: beam.PCollection) -> beam.PCollection: diff --git a/sdks/python/apache_beam/ml/inference/base.py b/sdks/python/apache_beam/ml/inference/base.py index 80490285edd3..3c7e6fe0e3de 100644 --- a/sdks/python/apache_beam/ml/inference/base.py +++ b/sdks/python/apache_beam/ml/inference/base.py @@ -44,10 +44,10 @@ from apache_beam.utils import shared try: - # pylint: disable=g-import-not-at-top + # pylint: disable=wrong-import-order, wrong-import-position import resource except ImportError: - resource = None + resource = None # type: ignore[assignment] _MICROSECOND_TO_MILLISECOND = 1000 _NANOSECOND_TO_MICROSECOND = 1000 @@ -59,7 +59,8 @@ class InferenceRunner(): """Implements running inferences for a framework.""" def run_inference(self, batch: List[Any], model: Any) -> Iterable[Any]: - """Runs inferences on a batch of examples and returns an Iterable of Predictions.""" + """Runs inferences on a batch of examples and + returns an Iterable of Predictions.""" raise NotImplementedError(type(self)) def get_num_bytes(self, batch: Any) -> int: @@ -67,7 +68,7 @@ def get_num_bytes(self, batch: Any) -> int: return len(pickle.dumps(batch)) def get_metrics_namespace(self) -> str: - """Returns a namespace for metrics collected by the RunInference transform.""" + """Returns a namespace for metrics collected by RunInference transform.""" return 'RunInference' @@ -249,7 +250,7 @@ def get_current_time_in_microseconds(self) -> int: class _FineGrainedClock(_Clock): def get_current_time_in_microseconds(self) -> int: return int( - time.clock_gettime_ns(time.CLOCK_REALTIME) / # pytype: disable=module-attr + time.clock_gettime_ns(time.CLOCK_REALTIME) / # type: ignore[attr-defined] _NANOSECOND_TO_MICROSECOND) diff --git a/sdks/python/apache_beam/ml/inference/base_test.py b/sdks/python/apache_beam/ml/inference/base_test.py index ab7bb16383ec..55936f63ed4f 100644 --- a/sdks/python/apache_beam/ml/inference/base_test.py +++ b/sdks/python/apache_beam/ml/inference/base_test.py @@ -23,8 +23,8 @@ from typing import Iterable import apache_beam as beam -import apache_beam.ml.inference.base as base from apache_beam.metrics.metric import MetricsFilter +from apache_beam.ml.inference import base from apache_beam.testing.test_pipeline import TestPipeline from apache_beam.testing.util import assert_that from apache_beam.testing.util import equal_to diff --git a/sdks/python/apache_beam/ml/inference/pytorch.py b/sdks/python/apache_beam/ml/inference/pytorch.py index cb0935e3a3a7..1eac09af3a7a 100644 --- a/sdks/python/apache_beam/ml/inference/pytorch.py +++ b/sdks/python/apache_beam/ml/inference/pytorch.py @@ -49,10 +49,10 @@ def run_inference(self, batch: List[torch.Tensor], the inference call. """ - batch = torch.stack(batch) - if batch.device != self._device: - batch = batch.to(self._device) - predictions = model(batch) + torch_batch = torch.stack(batch) + if torch_batch.device != self._device: + torch_batch = torch_batch.to(self._device) + predictions = model(torch_batch) return [PredictionResult(x, y) for x, y in zip(batch, predictions)] def get_num_bytes(self, batch: List[torch.Tensor]) -> int: @@ -75,10 +75,13 @@ def __init__( model_params: Dict[str, Any], device: str = 'CPU'): """ - state_dict_path: path to the saved dictionary of the model state. - model_class: class of the Pytorch model that defines the model structure. - device: the device on which you wish to run the model. If ``device = GPU`` - then device will be cuda if it is available. Otherwise, it will be cpu. + Initializes a PytorchModelLoader + :param state_dict_path: path to the saved dictionary of the model state. + :param model_class: class of the Pytorch model that defines the model + structure. + :param device: the device on which you wish to run the model. If + ``device = GPU`` then a GPU device will be used if it is available. + Otherwise, it will be CPU. See https://pytorch.org/tutorials/beginner/saving_loading_models.html for details diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference.py b/sdks/python/apache_beam/ml/inference/sklearn_inference.py index e0890a725c59..3b56eada881e 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference.py @@ -42,14 +42,14 @@ class ModelFileType(enum.Enum): class SklearnInferenceRunner(InferenceRunner): - def run_inference(self, batch: List[numpy.array], - model: Any) -> Iterable[numpy.array]: + def run_inference(self, batch: List[numpy.ndarray], + model: Any) -> Iterable[PredictionResult]: # vectorize data for better performance vectorized_batch = numpy.stack(batch, axis=0) predictions = model.predict(vectorized_batch) return [PredictionResult(x, y) for x, y in zip(batch, predictions)] - def get_num_bytes(self, batch: List[numpy.array]) -> int: + def get_num_bytes(self, batch: List[numpy.ndarray]) -> int: """Returns the number of bytes of data for a batch.""" return sum(sys.getsizeof(element) for element in batch) @@ -71,8 +71,9 @@ def load_model(self): elif self._model_file_type == ModelFileType.JOBLIB: if not joblib: raise ImportError( - 'Could not import joblib in this execution' - ' environment. For help with managing dependencies on Python workers see https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/' + 'Could not import joblib in this execution environment. ' + 'For help with managing dependencies on Python workers.' + 'see https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/' # pylint: disable=line-too-long ) return joblib.load(file) raise AssertionError('Unsupported serialization type.') diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py index a35a6922957f..f1efe73c96cd 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py @@ -30,8 +30,8 @@ from sklearn import svm import apache_beam as beam -import apache_beam.ml.inference.api as api -import apache_beam.ml.inference.base as base +from apache_beam.ml.inference import api +from apache_beam.ml.inference import base from apache_beam.ml.inference.sklearn_inference import ModelFileType from apache_beam.ml.inference.sklearn_inference import SklearnInferenceRunner from apache_beam.ml.inference.sklearn_inference import SklearnModelLoader @@ -49,7 +49,7 @@ class FakeModel: def __init__(self): self.total_predict_calls = 0 - def predict(self, input_vector: numpy.array): + def predict(self, input_vector: numpy.ndarray): self.total_predict_calls += 1 return numpy.sum(input_vector, axis=1) diff --git a/sdks/python/scripts/generate_pydoc.sh b/sdks/python/scripts/generate_pydoc.sh index 7890ecd9665d..cb86043b34a8 100755 --- a/sdks/python/scripts/generate_pydoc.sh +++ b/sdks/python/scripts/generate_pydoc.sh @@ -235,17 +235,6 @@ nitpick_ignore = [] nitpick_ignore += [('py:class', iden) for iden in ignore_identifiers] nitpick_ignore += [('py:obj', iden) for iden in ignore_identifiers] nitpick_ignore += [('py:exc', iden) for iden in ignore_references] - -# Monkey patch functools.wraps to retain original function argument signature -# for documentation. -# https://github.com/sphinx-doc/sphinx/issues/1711 -import functools -def fake_wraps(wrapped): - def wrapper(decorator): - return wrapped - return wrapper - -functools.wraps = fake_wraps EOF #=== index.rst ===# diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 858db8590a89..14b94291cfaf 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -144,6 +144,7 @@ deps = sphinx_rtd_theme==0.4.3 docutils<0.18 Jinja2==3.0.3 # TODO(BEAM-14172): Sphinx version is too old. + torch commands = time {toxinidir}/scripts/generate_pydoc.sh