diff --git a/sdks/python/apache_beam/ml/inference/__init__.py b/sdks/python/apache_beam/ml/inference/__init__.py
new file mode 100644
index 000000000000..cce3acad34a4
--- /dev/null
+++ b/sdks/python/apache_beam/ml/inference/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/sdks/python/apache_beam/ml/inference/api.py b/sdks/python/apache_beam/ml/inference/api.py
index 73dd87bb4e95..10a5a306704a 100644
--- a/sdks/python/apache_beam/ml/inference/api.py
+++ b/sdks/python/apache_beam/ml/inference/api.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+# mypy: ignore-errors
 
 from dataclasses import dataclass
 from typing import Tuple
@@ -51,7 +52,7 @@ class RunInference(beam.PTransform):
 
   TODO(BEAM-14046): Add and link to help documentation
   """
-  def __init__(self, model_loader: base.ModelLoader) -> beam.pvalue.PCollection:
+  def __init__(self, model_loader: base.ModelLoader):
     self._model_loader = model_loader
 
   def expand(self, pcoll: beam.PCollection) -> beam.PCollection:
diff --git a/sdks/python/apache_beam/ml/inference/base.py b/sdks/python/apache_beam/ml/inference/base.py
index 80490285edd3..3c7e6fe0e3de 100644
--- a/sdks/python/apache_beam/ml/inference/base.py
+++ b/sdks/python/apache_beam/ml/inference/base.py
@@ -44,10 +44,10 @@
 from apache_beam.utils import shared
 
 try:
-  # pylint: disable=g-import-not-at-top
+  # pylint: disable=wrong-import-order, wrong-import-position
   import resource
 except ImportError:
-  resource = None
+  resource = None  # type: ignore[assignment]
 
 _MICROSECOND_TO_MILLISECOND = 1000
 _NANOSECOND_TO_MICROSECOND = 1000
@@ -59,7 +59,8 @@
 class InferenceRunner():
   """Implements running inferences for a framework."""
   def run_inference(self, batch: List[Any], model: Any) -> Iterable[Any]:
-    """Runs inferences on a batch of examples and returns an Iterable of Predictions."""
+    """Runs inferences on a batch of examples and
+    returns an Iterable of Predictions."""
     raise NotImplementedError(type(self))
 
   def get_num_bytes(self, batch: Any) -> int:
@@ -67,7 +68,7 @@ def get_num_bytes(self, batch: Any) -> int:
     return len(pickle.dumps(batch))
 
   def get_metrics_namespace(self) -> str:
-    """Returns a namespace for metrics collected by the RunInference transform."""
+    """Returns a namespace for metrics collected by RunInference transform."""
     return 'RunInference'
 
 
@@ -249,7 +250,7 @@ def get_current_time_in_microseconds(self) -> int:
 class _FineGrainedClock(_Clock):
   def get_current_time_in_microseconds(self) -> int:
     return int(
-        time.clock_gettime_ns(time.CLOCK_REALTIME) /  # pytype: disable=module-attr
+        time.clock_gettime_ns(time.CLOCK_REALTIME) /  # type: ignore[attr-defined]
         _NANOSECOND_TO_MICROSECOND)
 
 
diff --git a/sdks/python/apache_beam/ml/inference/base_test.py b/sdks/python/apache_beam/ml/inference/base_test.py
index ab7bb16383ec..55936f63ed4f 100644
--- a/sdks/python/apache_beam/ml/inference/base_test.py
+++ b/sdks/python/apache_beam/ml/inference/base_test.py
@@ -23,8 +23,8 @@
 from typing import Iterable
 
 import apache_beam as beam
-import apache_beam.ml.inference.base as base
 from apache_beam.metrics.metric import MetricsFilter
+from apache_beam.ml.inference import base
 from apache_beam.testing.test_pipeline import TestPipeline
 from apache_beam.testing.util import assert_that
 from apache_beam.testing.util import equal_to
diff --git a/sdks/python/apache_beam/ml/inference/pytorch.py b/sdks/python/apache_beam/ml/inference/pytorch.py
index cb0935e3a3a7..1eac09af3a7a 100644
--- a/sdks/python/apache_beam/ml/inference/pytorch.py
+++ b/sdks/python/apache_beam/ml/inference/pytorch.py
@@ -49,10 +49,10 @@ def run_inference(self, batch: List[torch.Tensor],
     the inference call.
     """
 
-    batch = torch.stack(batch)
-    if batch.device != self._device:
-      batch = batch.to(self._device)
-    predictions = model(batch)
+    torch_batch = torch.stack(batch)
+    if torch_batch.device != self._device:
+      torch_batch = torch_batch.to(self._device)
+    predictions = model(torch_batch)
     return [PredictionResult(x, y) for x, y in zip(batch, predictions)]
 
   def get_num_bytes(self, batch: List[torch.Tensor]) -> int:
@@ -75,10 +75,13 @@ def __init__(
       model_params: Dict[str, Any],
       device: str = 'CPU'):
     """
-    state_dict_path: path to the saved dictionary of the model state.
-    model_class: class of the Pytorch model that defines the model structure.
-    device: the device on which you wish to run the model. If ``device = GPU``
-        then device will be cuda if it is available. Otherwise, it will be cpu.
+    Initializes a PytorchModelLoader
+    :param state_dict_path: path to the saved dictionary of the model state.
+    :param model_class: class of the Pytorch model that defines the model
+    structure.
+    :param device: the device on which you wish to run the model. If
+    ``device = GPU`` then a GPU device will be used if it is available.
+    Otherwise, it will be CPU.
 
     See https://pytorch.org/tutorials/beginner/saving_loading_models.html
     for details
diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference.py b/sdks/python/apache_beam/ml/inference/sklearn_inference.py
index e0890a725c59..3b56eada881e 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference.py
@@ -42,14 +42,14 @@ class ModelFileType(enum.Enum):
 
 
 class SklearnInferenceRunner(InferenceRunner):
-  def run_inference(self, batch: List[numpy.array],
-                    model: Any) -> Iterable[numpy.array]:
+  def run_inference(self, batch: List[numpy.ndarray],
+                    model: Any) -> Iterable[PredictionResult]:
     # vectorize data for better performance
     vectorized_batch = numpy.stack(batch, axis=0)
     predictions = model.predict(vectorized_batch)
     return [PredictionResult(x, y) for x, y in zip(batch, predictions)]
 
-  def get_num_bytes(self, batch: List[numpy.array]) -> int:
+  def get_num_bytes(self, batch: List[numpy.ndarray]) -> int:
     """Returns the number of bytes of data for a batch."""
     return sum(sys.getsizeof(element) for element in batch)
 
@@ -71,8 +71,9 @@ def load_model(self):
     elif self._model_file_type == ModelFileType.JOBLIB:
       if not joblib:
         raise ImportError(
-            'Could not import joblib in this execution'
-            ' environment. For help with managing dependencies on Python workers see https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/'
+            'Could not import joblib in this execution environment. '
+            'For help with managing dependencies on Python workers.'
+            'see https://beam.apache.org/documentation/sdks/python-pipeline-dependencies/'  # pylint: disable=line-too-long
         )
       return joblib.load(file)
     raise AssertionError('Unsupported serialization type.')
diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py
index a35a6922957f..f1efe73c96cd 100644
--- a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py
+++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py
@@ -30,8 +30,8 @@
 from sklearn import svm
 
 import apache_beam as beam
-import apache_beam.ml.inference.api as api
-import apache_beam.ml.inference.base as base
+from apache_beam.ml.inference import api
+from apache_beam.ml.inference import base
 from apache_beam.ml.inference.sklearn_inference import ModelFileType
 from apache_beam.ml.inference.sklearn_inference import SklearnInferenceRunner
 from apache_beam.ml.inference.sklearn_inference import SklearnModelLoader
@@ -49,7 +49,7 @@ class FakeModel:
   def __init__(self):
     self.total_predict_calls = 0
 
-  def predict(self, input_vector: numpy.array):
+  def predict(self, input_vector: numpy.ndarray):
     self.total_predict_calls += 1
     return numpy.sum(input_vector, axis=1)
 
diff --git a/sdks/python/scripts/generate_pydoc.sh b/sdks/python/scripts/generate_pydoc.sh
index 7890ecd9665d..cb86043b34a8 100755
--- a/sdks/python/scripts/generate_pydoc.sh
+++ b/sdks/python/scripts/generate_pydoc.sh
@@ -235,17 +235,6 @@ nitpick_ignore = []
 nitpick_ignore += [('py:class', iden) for iden in ignore_identifiers]
 nitpick_ignore += [('py:obj', iden) for iden in ignore_identifiers]
 nitpick_ignore += [('py:exc', iden) for iden in ignore_references]
-
-# Monkey patch functools.wraps to retain original function argument signature
-# for documentation.
-# https://github.com/sphinx-doc/sphinx/issues/1711
-import functools
-def fake_wraps(wrapped):
-  def wrapper(decorator):
-    return wrapped
-  return wrapper
-
-functools.wraps = fake_wraps
 EOF
 
 #=== index.rst ===#
diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini
index 858db8590a89..14b94291cfaf 100644
--- a/sdks/python/tox.ini
+++ b/sdks/python/tox.ini
@@ -144,6 +144,7 @@ deps =
   sphinx_rtd_theme==0.4.3
   docutils<0.18
   Jinja2==3.0.3 # TODO(BEAM-14172): Sphinx version is too old.
+  torch
 commands =
   time {toxinidir}/scripts/generate_pydoc.sh