From 96c448e0dc3cb269dc49c85c64ae36c5e8df8c22 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Wed, 30 Apr 2025 15:43:41 -0400 Subject: [PATCH 1/7] stash initial yapf changes --- .pre-commit-config.yaml | 4 ++-- sdks/python/pyproject.toml | 2 +- sdks/python/setup.cfg | 1 + sdks/python/tox.ini | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a6c0faae0823..92ab38f29107 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,10 +11,10 @@ # limitations under the License. repos: - - repo: https://github.com/pre-commit/mirrors-yapf + - repo: https://github.com/google/yapf # this rev is a release tag in the repo above and corresponds with a yapf # version. make sure this matches the version of yapf in tox.ini. - rev: v0.29.0 + rev: v0.43.0 hooks: - id: yapf files: ^sdks/python/apache_beam/ diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml index 8000c24f28aa..633a8511a19a 100644 --- a/sdks/python/pyproject.toml +++ b/sdks/python/pyproject.toml @@ -34,7 +34,7 @@ requires = [ 'pyyaml>=3.12,<7.0.0', # also update Jinja2 bounds in test-suites/xlang/build.gradle (look for xlangWrapperValidation task) "jinja2>=2.7.1,<4.0.0", - 'yapf==0.29.0' + 'yapf==0.43.0' ] diff --git a/sdks/python/setup.cfg b/sdks/python/setup.cfg index 6e259cf3e219..0d1e1c3347e4 100644 --- a/sdks/python/setup.cfg +++ b/sdks/python/setup.cfg @@ -61,6 +61,7 @@ continuation_indent_width = 4 column_limit = 80 allow_split_before_dict_value = False blank_line_before_module_docstring = True +blank_line_before_nested_class_or_def = False coalesce_brackets = True each_dict_entry_on_separate_line = True split_all_top_level_comma_separated_values = True diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index afb6625b8dca..b87b5ecc6f67 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -271,7 +271,7 @@ commands = [testenv:py3-yapf] # keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml deps = - yapf==0.29.0 + yapf==0.43.0 commands = yapf --version time yapf --in-place --parallel --recursive apache_beam @@ -279,7 +279,7 @@ commands = [testenv:py3-yapf-check] # keep the version of yapf in sync with the 'rev' in .pre-commit-config.yaml and pyproject.toml deps = - yapf==0.29.0 + yapf==0.43.0 commands = yapf --version time yapf --diff --parallel --recursive apache_beam From a8391f69859a7d0e0fa0bcf55ce09331598cd3c5 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 1 May 2025 11:20:56 -0400 Subject: [PATCH 2/7] example 0.43.0 formatting migration --- sdks/python/apache_beam/coders/coder_impl.py | 10 +- sdks/python/apache_beam/coders/coders.py | 5 +- .../coders/coders_property_based_test.py | 4 +- .../apache_beam/coders/coders_test_common.py | 11 +- .../apache_beam/coders/row_coder_test.py | 3 +- .../coders/standard_coders_test.py | 91 +++----- sdks/python/apache_beam/dataframe/convert.py | 10 +- sdks/python/apache_beam/dataframe/doctests.py | 36 ++- sdks/python/apache_beam/dataframe/frames.py | 181 ++++++--------- .../apache_beam/dataframe/frames_test.py | 105 ++++----- sdks/python/apache_beam/dataframe/io.py | 20 +- .../dataframe/pandas_doctests_test.py | 149 +++++-------- .../dataframe/pandas_top_level_functions.py | 11 +- .../apache_beam/dataframe/transforms.py | 14 +- .../apache_beam/dataframe/transforms_test.py | 6 +- .../apache_beam/examples/avro_nyc_trips.py | 10 +- .../inference/pytorch_language_modeling.py | 25 ++- .../inference/xgboost_iris_classification.py | 4 +- .../ml_transform/vocab_tfidf_processing.py | 3 +- .../examples/snippets/snippets_test.py | 17 +- .../aggregation/approximatequantiles_test.py | 3 +- .../aggregation/approximateunique_test.py | 3 +- .../combineperkey_side_inputs_singleton.py | 3 +- .../combinevalues_side_inputs_singleton.py | 3 +- .../snippets/transforms/aggregation/top_of.py | 7 +- .../transforms/aggregation/top_per_key.py | 7 +- .../elementwise/filter_side_inputs_dict.py | 3 +- .../elementwise/filter_side_inputs_iter.py | 3 +- .../filter_side_inputs_singleton.py | 3 +- .../flatmap_side_inputs_singleton.py | 3 +- .../transforms/elementwise/map_context.py | 5 +- .../elementwise/map_side_inputs_iter.py | 3 +- .../elementwise/map_side_inputs_singleton.py | 3 +- .../elementwise/withtimestamps_event_time.py | 23 +- .../snippets/transforms/other/window.py | 48 ++-- sdks/python/apache_beam/examples/sql_taxi.py | 15 +- .../internal/cloudpickle/cloudpickle.py | 12 +- .../apache_beam/internal/dill_pickler.py | 3 +- .../apache_beam/internal/metrics/metric.py | 4 +- sdks/python/apache_beam/internal/util.py | 7 +- sdks/python/apache_beam/io/avroio.py | 9 +- sdks/python/apache_beam/io/avroio_test.py | 4 +- .../io/external/xlang_debeziumio_it_test.py | 4 +- .../io/external/xlang_jdbcio_it_test.py | 8 +- sdks/python/apache_beam/io/filebasedsink.py | 8 +- sdks/python/apache_beam/io/fileio.py | 13 +- sdks/python/apache_beam/io/fileio_test.py | 8 +- sdks/python/apache_beam/io/filesystem_test.py | 7 +- sdks/python/apache_beam/io/gcp/bigquery.py | 37 ++- .../apache_beam/io/gcp/bigquery_file_loads.py | 6 +- .../io/gcp/bigquery_file_loads_test.py | 17 +- .../apache_beam/io/gcp/bigquery_test.py | 69 +++--- .../apache_beam/io/gcp/bigquery_tools.py | 4 +- .../apache_beam/io/gcp/bigquery_tools_test.py | 10 +- .../io/gcp/bigquery_write_it_test.py | 6 +- .../io/gcp/experimental/spannerio.py | 35 +-- .../apache_beam/io/gcp/gcsfilesystem_test.py | 6 +- sdks/python/apache_beam/io/gcp/pubsub_test.py | 6 +- .../io/gcp/tests/bigquery_matcher.py | 8 +- .../apache_beam/io/gcp/tests/utils_test.py | 9 +- .../apache_beam/io/localfilesystem_test.py | 4 +- sdks/python/apache_beam/io/mongodbio.py | 12 +- sdks/python/apache_beam/io/mongodbio_test.py | 7 +- sdks/python/apache_beam/io/range_trackers.py | 6 +- .../apache_beam/io/source_test_utils.py | 8 +- sdks/python/apache_beam/metrics/cells.py | 3 +- sdks/python/apache_beam/metrics/cells_test.py | 3 +- sdks/python/apache_beam/metrics/execution.py | 22 +- .../apache_beam/metrics/monitoring_infos.py | 3 +- .../apache_beam/ml/anomaly/specifiable.py | 3 +- .../ml/anomaly/specifiable_test.py | 3 +- .../apache_beam/ml/anomaly/transforms.py | 25 +-- .../apache_beam/ml/anomaly/transforms_test.py | 14 +- .../apache_beam/ml/inference/base_test.py | 10 +- .../ml/inference/huggingface_inference.py | 19 +- .../inference/huggingface_inference_test.py | 6 +- .../ml/inference/onnx_inference_test.py | 69 +++--- .../ml/inference/pytorch_inference_it_test.py | 4 +- .../ml/inference/pytorch_inference_test.py | 80 +++---- .../ml/inference/sklearn_inference_test.py | 4 +- .../ml/inference/tensorflow_inference_test.py | 33 ++- .../ml/inference/tensorrt_inference_test.py | 24 +- sdks/python/apache_beam/ml/inference/utils.py | 4 +- .../bigquery_vector_search_it_test.py | 16 +- .../ml/rag/ingestion/alloydb_it_test.py | 32 +-- .../apache_beam/ml/rag/ingestion/bigquery.py | 11 +- .../ml/rag/ingestion/bigquery_it_test.py | 3 +- sdks/python/apache_beam/ml/transforms/base.py | 4 +- .../apache_beam/ml/transforms/base_test.py | 15 +- .../ml/transforms/embeddings/huggingface.py | 4 +- .../apache_beam/ml/transforms/tft_test.py | 18 +- .../apache_beam/options/pipeline_options.py | 11 +- .../options/pipeline_options_validator.py | 4 +- sdks/python/apache_beam/pipeline.py | 26 +-- sdks/python/apache_beam/pipeline_test.py | 14 +- sdks/python/apache_beam/pvalue.py | 13 +- sdks/python/apache_beam/runners/common.py | 211 +++++++++--------- .../python/apache_beam/runners/common_test.py | 5 +- .../runners/dask/dask_runner_test.py | 9 +- .../dataflow_exercise_metrics_pipeline.py | 10 +- .../runners/dataflow/dataflow_job_service.py | 3 +- .../runners/dataflow/dataflow_metrics.py | 4 +- .../runners/dataflow/dataflow_metrics_test.py | 9 +- .../runners/dataflow/dataflow_runner.py | 4 +- .../runners/dataflow/dataflow_runner_test.py | 4 +- .../runners/dataflow/internal/apiclient.py | 16 +- .../cloudbuild/cloudbuild_v1_client.py | 168 +++++--------- ...consumer_tracking_pipeline_visitor_test.py | 6 +- .../runners/direct/direct_metrics.py | 20 +- .../runners/direct/evaluation_context.py | 4 +- .../runners/direct/transform_evaluator.py | 3 +- .../background_caching_job_test.py | 24 +- .../runners/interactive/cache_manager.py | 11 +- .../runners/interactive/caching/read_cache.py | 4 +- .../interactive/caching/write_cache.py | 17 +- .../interactive/interactive_environment.py | 3 +- .../interactive/pipeline_instrument.py | 14 +- .../runners/interactive/recording_manager.py | 11 +- .../interactive/sql/beam_sql_magics_test.py | 24 +- .../runners/interactive/sql/sql_chain.py | 3 +- .../runners/interactive/sql/utils.py | 4 +- .../apache_beam/runners/pipeline_utils.py | 11 +- .../portability/abstract_job_service.py | 12 +- .../runners/portability/artifact_service.py | 3 +- .../runners/portability/expansion_service.py | 15 +- .../runners/portability/flink_runner_test.py | 4 +- .../portability/fn_api_runner/execution.py | 7 +- .../portability/fn_api_runner/fn_runner.py | 28 +-- .../fn_api_runner/fn_runner_test.py | 49 ++-- .../portability/fn_api_runner/translations.py | 41 ++-- .../fn_api_runner/translations_test.py | 4 +- .../fn_api_runner/trigger_manager_test.py | 12 +- .../fn_api_runner/worker_handlers.py | 155 +++++++------ .../runners/portability/local_job_service.py | 12 +- .../runners/portability/portable_runner.py | 3 +- .../runners/portability/prism_runner.py | 4 +- .../runners/portability/prism_runner_test.py | 8 +- .../apache_beam/runners/portability/stager.py | 4 +- sdks/python/apache_beam/runners/render.py | 5 +- .../apache_beam/runners/trivial_runner.py | 8 +- .../runners/worker/bundle_processor.py | 28 +-- .../apache_beam/runners/worker/data_plane.py | 5 +- .../runners/worker/data_sampler.py | 8 +- .../apache_beam/runners/worker/log_handler.py | 4 +- .../runners/worker/log_handler_test.py | 3 +- .../apache_beam/runners/worker/opcounters.py | 15 +- .../runners/worker/operation_specs.py | 23 +- .../apache_beam/runners/worker/operations.py | 112 +++++----- .../apache_beam/runners/worker/sdk_worker.py | 16 +- .../runners/worker/sdk_worker_main.py | 3 +- .../runners/worker/sdk_worker_main_test.py | 8 +- .../runners/worker/sdk_worker_test.py | 55 ++--- .../runners/worker/statecache_test.py | 31 +-- .../runners/worker/worker_status.py | 4 +- .../testing/analyzers/perf_analysis.py | 4 +- .../benchmarks/cloudml/pipelines/workflow.py | 3 +- .../benchmarks/nexmark/queries/query1.py | 5 +- .../benchmarks/nexmark/queries/query3.py | 6 +- .../apache_beam/testing/datatype_inference.py | 13 +- .../testing/datatype_inference_test.py | 4 +- .../load_tests/load_test_metrics_utils.py | 7 +- .../apache_beam/testing/synthetic_pipeline.py | 3 +- .../apache_beam/transforms/batch_dofn_test.py | 3 +- .../apache_beam/transforms/combiners.py | 21 +- sdks/python/apache_beam/transforms/core.py | 58 +++-- sdks/python/apache_beam/transforms/display.py | 3 +- .../apache_beam/transforms/environments.py | 81 ++++--- .../python/apache_beam/transforms/external.py | 26 +-- .../fully_qualified_named_transform_test.py | 13 +- .../apache_beam/transforms/ptransform.py | 34 +-- .../apache_beam/transforms/ptransform_test.py | 45 ++-- .../apache_beam/transforms/sideinputs_test.py | 32 +-- .../python/apache_beam/transforms/sql_test.py | 4 +- sdks/python/apache_beam/transforms/trigger.py | 7 +- .../apache_beam/transforms/trigger_test.py | 98 ++++---- sdks/python/apache_beam/transforms/util.py | 8 +- .../apache_beam/transforms/util_test.py | 203 ++++++++--------- sdks/python/apache_beam/transforms/window.py | 5 +- .../typehints/arrow_type_compatibility.py | 19 +- .../arrow_type_compatibility_test.py | 39 ++-- sdks/python/apache_beam/typehints/batch.py | 3 +- .../apache_beam/typehints/decorators.py | 11 +- .../typehints/pandas_type_compatibility.py | 9 +- .../pandas_type_compatibility_test.py | 26 +-- .../typehints/pytorch_type_compatibility.py | 3 +- sdks/python/apache_beam/typehints/row_type.py | 3 +- .../apache_beam/typehints/schemas_test.py | 22 +- .../typehints/trivial_inference.py | 4 +- .../typehints/trivial_inference_test.py | 89 +++----- .../python/apache_beam/typehints/typehints.py | 4 +- sdks/python/apache_beam/utils/urns.py | 6 +- .../yaml/examples/testing/examples_test.py | 3 +- .../apache_beam/yaml/generate_yaml_docs.py | 3 +- sdks/python/apache_beam/yaml/json_utils.py | 6 +- sdks/python/apache_beam/yaml/readme_test.py | 8 +- sdks/python/apache_beam/yaml/yaml_io.py | 8 +- sdks/python/apache_beam/yaml/yaml_io_test.py | 7 +- .../python/apache_beam/yaml/yaml_join_test.py | 4 +- sdks/python/apache_beam/yaml/yaml_mapping.py | 7 +- sdks/python/apache_beam/yaml/yaml_provider.py | 26 +-- .../yaml/yaml_provider_unit_test.py | 18 +- sdks/python/apache_beam/yaml/yaml_testing.py | 3 +- .../python/apache_beam/yaml/yaml_transform.py | 9 +- .../apache_beam/yaml/yaml_transform_test.py | 13 +- sdks/python/apache_beam/yaml/yaml_udf_test.py | 8 +- sdks/python/apache_beam/yaml/yaml_utils.py | 4 +- sdks/python/setup.cfg | 1 + 207 files changed, 1828 insertions(+), 2202 deletions(-) diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py index 49cbbdd17e69..6c46b17b1f6d 100644 --- a/sdks/python/apache_beam/coders/coder_impl.py +++ b/sdks/python/apache_beam/coders/coder_impl.py @@ -679,8 +679,7 @@ def __init__( self, key_coder, # type: CoderImpl value_coder, # type: CoderImpl - is_deterministic = False - ): + is_deterministic=False): self._key_coder = key_coder self._value_coder = value_coder self._is_deterministic = is_deterministic @@ -1061,8 +1060,8 @@ def decode_from_stream(self, in_stream, nested): # type: (create_InputStream, bool) -> Any return self._construct_from_components([ c.decode_from_stream( - in_stream, nested or i + 1 < len(self._coder_impls)) for i, - c in enumerate(self._coder_impls) + in_stream, nested or i + 1 < len(self._coder_impls)) + for i, c in enumerate(self._coder_impls) ]) def estimate_size(self, value, nested=False): @@ -1890,8 +1889,7 @@ def _row_column_encoders(self, columns): RowColumnEncoder.create( self.schema.fields[i].type.atomic_type, self.components[i], - columns[name]) for i, - name in enumerate(self.field_names) + columns[name]) for i, name in enumerate(self.field_names) ] def encode_batch_to_stream(self, columns: Dict[str, np.ndarray], out): diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index cb23e3967e33..a0a34488a0fc 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -382,9 +382,8 @@ def register_structured_urn(urn, cls): """ setattr( cls, - 'to_runner_api_parameter', - lambda self, - unused_context: (urn, None, self._get_component_coders())) + 'to_runner_api_parameter', lambda self, unused_context: + (urn, None, self._get_component_coders())) # pylint: disable=unused-variable @Coder.register_urn(urn, None) diff --git a/sdks/python/apache_beam/coders/coders_property_based_test.py b/sdks/python/apache_beam/coders/coders_property_based_test.py index 9279fc31c099..d8d844975b9b 100644 --- a/sdks/python/apache_beam/coders/coders_property_based_test.py +++ b/sdks/python/apache_beam/coders/coders_property_based_test.py @@ -144,9 +144,7 @@ def test_row_coder(self, data: st.DataObject): row = RowType( **{ name: data.draw(SCHEMA_TYPES_TO_STRATEGY[type_]) - for name, - type_, - nullable in schema + for name, type_, nullable in schema }) coder = RowCoder(typing_to_runner_api(RowType).row_type.schema) diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index bed93cbc5545..10d1b9513587 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -163,7 +163,7 @@ def tearDownClass(cls): coders.BigEndianShortCoder, coders.SinglePrecisionFloatCoder, coders.ToBytesCoder, - coders.BigIntegerCoder, # tested in DecimalCoder + coders.BigIntegerCoder, # tested in DecimalCoder coders.TimestampPrefixingOpaqueWindowCoder, ]) cls.seen_nested -= set( @@ -686,9 +686,14 @@ def test_nullable_coder(self): def test_map_coder(self): values = [ - {1: "one", 300: "three hundred"}, # force yapf to be nice + { + 1: "one", 300: "three hundred" + }, # force yapf to be nice {}, - {i: str(i) for i in range(5000)} + { + i: str(i) + for i in range(5000) + } ] map_coder = coders.MapCoder(coders.VarIntCoder(), coders.StrUtf8Coder()) self.check_coder(map_coder, *values) diff --git a/sdks/python/apache_beam/coders/row_coder_test.py b/sdks/python/apache_beam/coders/row_coder_test.py index 4d47bca3e2b2..c12250d64958 100644 --- a/sdks/python/apache_beam/coders/row_coder_test.py +++ b/sdks/python/apache_beam/coders/row_coder_test.py @@ -446,8 +446,7 @@ def test_batch_encode_decode(self): for size in [len(self.PEOPLE) - 1, len(self.PEOPLE), len(self.PEOPLE) + 1]: dest = { field: np.ndarray((size, ), dtype=a.dtype) - for field, - a in columnar.items() + for field, a in columnar.items() } n = min(size, len(self.PEOPLE)) self.assertEqual( diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py index 47df0116f2c6..b6f0dbf12208 100644 --- a/sdks/python/apache_beam/coders/standard_coders_test.py +++ b/sdks/python/apache_beam/coders/standard_coders_test.py @@ -139,65 +139,48 @@ class StandardCodersTest(unittest.TestCase): 'beam:coder:bool:v1': lambda x: x, 'beam:coder:string_utf8:v1': lambda x: x, 'beam:coder:varint:v1': lambda x: x, - 'beam:coder:kv:v1': lambda x, - key_parser, - value_parser: (key_parser(x['key']), value_parser(x['value'])), + 'beam:coder:kv:v1': lambda x, key_parser, value_parser: + (key_parser(x['key']), value_parser(x['value'])), 'beam:coder:interval_window:v1': lambda x: IntervalWindow( - start=Timestamp(micros=(x['end'] - x['span']) * 1000), - end=Timestamp(micros=x['end'] * 1000)), - 'beam:coder:iterable:v1': lambda x, - parser: list(map(parser, x)), - 'beam:coder:state_backed_iterable:v1': lambda x, - parser: list(map(parser, x)), + start=Timestamp(micros=(x['end'] - x['span']) * 1000), end=Timestamp( + micros=x['end'] * 1000)), + 'beam:coder:iterable:v1': lambda x, parser: list(map(parser, x)), + 'beam:coder:state_backed_iterable:v1': lambda x, parser: list( + map(parser, x)), 'beam:coder:global_window:v1': lambda x: window.GlobalWindow(), - 'beam:coder:windowed_value:v1': lambda x, - value_parser, + 'beam:coder:windowed_value:v1': lambda x, value_parser, window_parser: + windowed_value.create( + value_parser(x['value']), x['timestamp'] * 1000, tuple( + window_parser(w) for w in x['windows'])), + 'beam:coder:param_windowed_value:v1': lambda x, value_parser, window_parser: windowed_value.create( - value_parser(x['value']), - x['timestamp'] * 1000, - tuple(window_parser(w) for w in x['windows'])), - 'beam:coder:param_windowed_value:v1': lambda x, - value_parser, - window_parser: windowed_value.create( - value_parser(x['value']), - x['timestamp'] * 1000, - tuple(window_parser(w) for w in x['windows']), - PaneInfo( - x['pane']['is_first'], - x['pane']['is_last'], - PaneInfoTiming.from_string(x['pane']['timing']), - x['pane']['index'], - x['pane']['on_time_index'])), - 'beam:coder:timer:v1': lambda x, - value_parser, - window_parser: userstate.Timer( - user_key=value_parser(x['userKey']), - dynamic_timer_tag=x['dynamicTimerTag'], - clear_bit=x['clearBit'], - windows=tuple(window_parser(w) for w in x['windows']), - fire_timestamp=None, - hold_timestamp=None, - paneinfo=None) if x['clearBit'] else userstate.Timer( - user_key=value_parser(x['userKey']), - dynamic_timer_tag=x['dynamicTimerTag'], - clear_bit=x['clearBit'], - fire_timestamp=Timestamp(micros=x['fireTimestamp'] * 1000), - hold_timestamp=Timestamp(micros=x['holdTimestamp'] * 1000), - windows=tuple(window_parser(w) for w in x['windows']), - paneinfo=PaneInfo( - x['pane']['is_first'], - x['pane']['is_last'], - PaneInfoTiming.from_string(x['pane']['timing']), - x['pane']['index'], - x['pane']['on_time_index'])), + value_parser(x['value']), x['timestamp'] * 1000, tuple( + window_parser(w) for w in x['windows']), PaneInfo( + x['pane']['is_first'], x['pane']['is_last'], PaneInfoTiming. + from_string(x['pane']['timing']), x['pane']['index'], x[ + 'pane']['on_time_index'])), + 'beam:coder:timer:v1': lambda x, value_parser, window_parser: userstate. + Timer( + user_key=value_parser(x['userKey']), dynamic_timer_tag=x[ + 'dynamicTimerTag'], clear_bit=x['clearBit'], windows=tuple( + window_parser(w) for w in x['windows']), fire_timestamp=None, + hold_timestamp=None, paneinfo=None) + if x['clearBit'] else userstate.Timer( + user_key=value_parser(x['userKey']), dynamic_timer_tag=x[ + 'dynamicTimerTag'], clear_bit=x['clearBit'], fire_timestamp= + Timestamp(micros=x['fireTimestamp'] * 1000), hold_timestamp=Timestamp( + micros=x['holdTimestamp'] * 1000), windows=tuple( + window_parser(w) for w in x['windows']), paneinfo=PaneInfo( + x['pane']['is_first'], x['pane']['is_last'], + PaneInfoTiming.from_string(x['pane']['timing']), x[ + 'pane']['index'], x['pane']['on_time_index'])), 'beam:coder:double:v1': parse_float, - 'beam:coder:sharded_key:v1': lambda x, - value_parser: ShardedKey( + 'beam:coder:sharded_key:v1': lambda x, value_parser: ShardedKey( key=value_parser(x['key']), shard_id=x['shardId'].encode('utf-8')), - 'beam:coder:custom_window:v1': lambda x, - window_parser: window_parser(x['window']), - 'beam:coder:nullable:v1': lambda x, - value_parser: x.encode('utf-8') if x else None + 'beam:coder:custom_window:v1': lambda x, window_parser: window_parser( + x['window']), + 'beam:coder:nullable:v1': lambda x, value_parser: x.encode('utf-8') + if x else None } def test_standard_coders(self): diff --git a/sdks/python/apache_beam/dataframe/convert.py b/sdks/python/apache_beam/dataframe/convert.py index c5a0d1025c6d..4753054a99f0 100644 --- a/sdks/python/apache_beam/dataframe/convert.py +++ b/sdks/python/apache_beam/dataframe/convert.py @@ -257,14 +257,14 @@ def extract_input(placeholder): {ix: df._expr for (ix, df) in enumerate(new_dataframes)}) - TO_PCOLLECTION_CACHE.update( - {new_dataframes[ix]._expr._id: pc - for ix, pc in new_results.items()}) + TO_PCOLLECTION_CACHE.update({ + new_dataframes[ix]._expr._id: pc + for ix, pc in new_results.items() + }) raw_results = { ix: TO_PCOLLECTION_CACHE[df._expr._id] - for ix, - df in enumerate(dataframes) + for ix, df in enumerate(dataframes) } if yield_elements == "schemas": diff --git a/sdks/python/apache_beam/dataframe/doctests.py b/sdks/python/apache_beam/dataframe/doctests.py index 7d2e98bfe14c..19f78338310a 100644 --- a/sdks/python/apache_beam/dataframe/doctests.py +++ b/sdks/python/apache_beam/dataframe/doctests.py @@ -188,8 +188,7 @@ def compute_using_session(self, to_compute): session = expressions.PartitioningSession(self._env._inputs) return { name: session.evaluate(frame._expr) - for name, - frame in to_compute.items() + for name, frame in to_compute.items() } def compute_using_beam(self, to_compute): @@ -198,13 +197,13 @@ def compute_using_beam(self, to_compute): input_pcolls = { placeholder: p | 'Create%s' % placeholder >> beam.Create([input[::2], input[1::2]]) - for placeholder, - input in self._env._inputs.items() + for placeholder, input in self._env._inputs.items() } output_pcolls = ( - input_pcolls | transforms._DataframeExpressionsTransform( - {name: frame._expr - for name, frame in to_compute.items()})) + input_pcolls | transforms._DataframeExpressionsTransform({ + name: frame._expr + for name, frame in to_compute.items() + })) for name, output_pcoll in output_pcolls.items(): _ = output_pcoll | 'Record%s' % name >> beam.FlatMap( recorder.record_fn(name)) @@ -365,18 +364,15 @@ def to_callable(cond): self._wont_implement_ok = { test: [to_callable(cond) for cond in examples] - for test, - examples in (wont_implement_ok or {}).items() + for test, examples in (wont_implement_ok or {}).items() } self._not_implemented_ok = { test: [to_callable(cond) for cond in examples] - for test, - examples in (not_implemented_ok or {}).items() + for test, examples in (not_implemented_ok or {}).items() } self._skip = { test: [to_callable(cond) for cond in examples] - for test, - examples in (skip or {}).items() + for test, examples in (skip or {}).items() } super().__init__( checker=_DeferrredDataframeOutputChecker(self._test_env, use_beam), @@ -541,9 +537,9 @@ def is_example_line(line): IMPORT_PANDAS = 'import pandas as pd' example_srcs = [] - lines = iter([(lineno, line.rstrip()) for lineno, - line in enumerate(rst.split('\n')) if is_example_line(line)] + - [(None, 'END')]) + lines = iter([(lineno, line.rstrip()) + for lineno, line in enumerate(rst.split('\n')) + if is_example_line(line)] + [(None, 'END')]) # https://ipython.readthedocs.io/en/stable/sphinxext.html lineno, line = next(lines) @@ -696,12 +692,8 @@ def _run_patched(func, *args, **kwargs): # Unfortunately the runner is not injectable. original_doc_test_runner = doctest.DocTestRunner doctest.DocTestRunner = lambda **kwargs: BeamDataframeDoctestRunner( - env, - use_beam=use_beam, - wont_implement_ok=wont_implement_ok, - not_implemented_ok=not_implemented_ok, - skip=skip, - **kwargs) + env, use_beam=use_beam, wont_implement_ok=wont_implement_ok, + not_implemented_ok=not_implemented_ok, skip=skip, **kwargs) with expressions.allow_non_parallel_operations(): return func( *args, extraglobs=extraglobs, optionflags=optionflags, **kwargs) diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py index ce6db1a12057..b839d5504cea 100644 --- a/sdks/python/apache_beam/dataframe/frames.py +++ b/sdks/python/apache_beam/dataframe/frames.py @@ -232,13 +232,9 @@ def drop(self, labels, axis, index, columns, errors, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'drop', - lambda df: df.drop( - axis=axis, - index=index, - columns=columns, - errors=errors, - **kwargs), [self._expr], + 'drop', lambda df: df.drop( + axis=axis, index=index, columns=columns, errors=errors, **kwargs + ), [self._expr], proxy=proxy, requires_partition_by=requires)) @@ -248,8 +244,8 @@ def drop(self, labels, axis, index, columns, errors, **kwargs): def droplevel(self, level, axis): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'droplevel', - lambda df: df.droplevel(level, axis=axis), [self._expr], + 'droplevel', lambda df: df.droplevel(level, axis=axis), + [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary() if axis in (1, 'column') else partitionings.Singleton())) @@ -259,8 +255,7 @@ def droplevel(self, level, axis): def swaplevel(self, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'swaplevel', - lambda df: df.swaplevel(**kwargs), [self._expr], + 'swaplevel', lambda df: df.swaplevel(**kwargs), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) @@ -300,8 +295,7 @@ def __init__(self, value): with expressions.allow_non_parallel_operations(): value_expr = expressions.ComputedExpression( - 'as_scalar', - lambda df: AsScalar(df), [value._expr], + 'as_scalar', lambda df: AsScalar(df), [value._expr], requires_partition_by=partitionings.Singleton()) get_value = lambda x: x.value @@ -322,14 +316,9 @@ def __init__(self, value): return frame_base.DeferredFrame.wrap( # yapf: disable expressions.ComputedExpression( - 'fillna', - lambda df, - value: df.fillna( - get_value(value), - method=method, - axis=axis, - limit=limit, - **kwargs), [self._expr, value_expr], + 'fillna', lambda df, value: df.fillna( + get_value(value), method=method, axis=axis, limit=limit, ** + kwargs), [self._expr, value_expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=requires)) @@ -345,30 +334,30 @@ def __init__(self, value): @frame_base.with_docs_from(pd.DataFrame) def first(self, offset): per_partition = expressions.ComputedExpression( - 'first-per-partition', - lambda df: df.sort_index().first(offset=offset), [self._expr], + 'first-per-partition', lambda df: df.sort_index().first(offset=offset), + [self._expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Arbitrary()) with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'first', - lambda df: df.sort_index().first(offset=offset), [per_partition], + 'first', lambda df: df.sort_index().first(offset=offset), + [per_partition], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Singleton())) @frame_base.with_docs_from(pd.DataFrame) def last(self, offset): per_partition = expressions.ComputedExpression( - 'last-per-partition', - lambda df: df.sort_index().last(offset=offset), [self._expr], + 'last-per-partition', lambda df: df.sort_index().last(offset=offset), + [self._expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Arbitrary()) with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'last', - lambda df: df.sort_index().last(offset=offset), [per_partition], + 'last', lambda df: df.sort_index().last(offset=offset), + [per_partition], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Singleton())) @@ -388,8 +377,7 @@ def groupby(self, by, level, axis, as_index, group_keys, **kwargs): if axis in (1, 'columns'): return _DeferredGroupByCols( expressions.ComputedExpression( - 'groupbycols', - lambda df: df.groupby( + 'groupbycols', lambda df: df.groupby( by, axis=axis, group_keys=group_keys, **kwargs), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary()), @@ -561,11 +549,9 @@ def prepend_index(df, by): # type: ignore return DeferredGroupBy( expressions.ComputedExpression( - 'groupbyindex', - lambda df: df.groupby( - level=list(range(df.index.nlevels)), - group_keys=group_keys, - **kwargs), [to_group], + 'groupbyindex', lambda df: df.groupby( + level=list(range(df.index.nlevels)), group_keys=group_keys, ** + kwargs), [to_group], requires_partition_by=partitionings.Index(), preserves_partition_by=partitionings.Arbitrary()), kwargs, @@ -612,8 +598,8 @@ def reset_index(self, level=None, **kwargs): requires_partition_by = partitionings.Arbitrary() return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'reset_index', - lambda df: df.reset_index(level=level, **kwargs), [self._expr], + 'reset_index', lambda df: df.reset_index(level=level, **kwargs), + [self._expr], preserves_partition_by=partitionings.Singleton(), requires_partition_by=requires_partition_by)) @@ -705,12 +691,8 @@ def replace(self, to_replace, value, limit, method, **kwargs): "requires collecting all data on a single node.")) return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'replace', - lambda df: df.replace( - to_replace=to_replace, - value=value, - limit=limit, - method=method, + 'replace', lambda df: df.replace( + to_replace=to_replace, value=value, limit=limit, method=method, **kwargs), [self._expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=requires_partition_by)) @@ -732,10 +714,8 @@ def tz_localize(self, ambiguous, **kwargs): elif isinstance(ambiguous, frame_base.DeferredFrame): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'tz_localize', - lambda df, - ambiguous: df.tz_localize(ambiguous=ambiguous, **kwargs), - [self._expr, ambiguous._expr], + 'tz_localize', lambda df, ambiguous: df.tz_localize( + ambiguous=ambiguous, **kwargs), [self._expr, ambiguous._expr], requires_partition_by=partitionings.Index(), preserves_partition_by=partitionings.Singleton())) elif ambiguous == 'infer': @@ -767,8 +747,7 @@ def size(self): with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'sum_sizes', - lambda sizes: sizes.sum(), [sizes], + 'sum_sizes', lambda sizes: sizes.sum(), [sizes], requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -787,8 +766,7 @@ def length(self): with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'sum_lengths', - lambda lengths: lengths.sum(), [lengths], + 'sum_lengths', lambda lengths: lengths.sum(), [lengths], requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -812,8 +790,7 @@ def empty(self): with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'check_all_empty', - lambda empties: empties.all(), [empties], + 'check_all_empty', lambda empties: empties.all(), [empties], requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -834,8 +811,7 @@ def bool(self): # Will throw if overall dataset has != 1 element return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'combine_all_bools', - lambda bools: bools.bool(), [bools], + 'combine_all_bools', lambda bools: bools.bool(), [bools], proxy=bool(), requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -845,8 +821,7 @@ def equals(self, other): intermediate = expressions.ComputedExpression( 'equals_partitioned', # Wrap scalar results in a Series for easier concatenation later - lambda df, - other: pd.Series(df.equals(other)), + lambda df, other: pd.Series(df.equals(other)), [self._expr, other._expr], requires_partition_by=partitionings.Index(), preserves_partition_by=partitionings.Singleton()) @@ -854,8 +829,7 @@ def equals(self, other): with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'aggregate_equals', - lambda df: df.all(), [intermediate], + 'aggregate_equals', lambda df: df.all(), [intermediate], requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -1010,8 +984,7 @@ def unstack(self, **kwargs): "Please upgrade to pandas 1.2.0 or higher to use this operation.") return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'unstack', - lambda s: s.unstack(**kwargs), [self._expr], + 'unstack', lambda s: s.unstack(**kwargs), [self._expr], requires_partition_by=partitionings.Index())) else: # Unstacking MultiIndex objects @@ -1062,8 +1035,7 @@ def unstack(self, **kwargs): with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'unstack', - lambda s: pd.concat([proxy, s.unstack(**kwargs)]), + 'unstack', lambda s: pd.concat([proxy, s.unstack(**kwargs)]), [self._expr], proxy=proxy, requires_partition_by=partitionings.Singleton())) @@ -1080,8 +1052,7 @@ def xs(self, key, axis, level, **kwargs): # KeyError at construction time for missing columns. return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'xs', - lambda df: df.xs(key, axis=axis, **kwargs), [self._expr], + 'xs', lambda df: df.xs(key, axis=axis, **kwargs), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) elif axis not in ('index', 0): @@ -1266,16 +1237,14 @@ def fn(s): @frame_base.with_docs_from(pd.Series) def hasnans(self): has_nans = expressions.ComputedExpression( - 'hasnans', - lambda s: pd.Series(s.hasnans), [self._expr], + 'hasnans', lambda s: pd.Series(s.hasnans), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Singleton()) with expressions.allow_non_parallel_operations(): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'combine_hasnans', - lambda s: s.any(), [has_nans], + 'combine_hasnans', lambda s: s.any(), [has_nans], requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -1312,8 +1281,7 @@ def __getitem__(self, key): expressions.ComputedExpression( # yapf: disable 'getitem', - lambda df, - indexer: df[indexer], + lambda df, indexer: df[indexer], [self._expr, key._expr], requires_partition_by=partitionings.Index(), preserves_partition_by=partitionings.Arbitrary())) @@ -1369,9 +1337,7 @@ def append(self, to_append, ignore_index, verify_integrity, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'append', - lambda s, - to_append: s.append( + 'append', lambda s, to_append: s.append( to_append, verify_integrity=verify_integrity, **kwargs), [self._expr, to_append._expr], requires_partition_by=requires, @@ -1399,9 +1365,7 @@ def align(self, other, join, axis, level, method, **kwargs): # multiple return values. aligned = frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'align', - lambda x, - y: pd.concat([x, y], axis=1, join='inner'), + 'align', lambda x, y: pd.concat([x, y], axis=1, join='inner'), [self._expr, other._expr], requires_partition_by=partitionings.Index(), preserves_partition_by=partitionings.Arbitrary())) @@ -1488,8 +1452,7 @@ def compute_idx(s): with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'idx_combine', - lambda s: func(s, **kwargs), [idx_func], + 'idx_combine', lambda s: func(s, **kwargs), [idx_func], requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -1514,8 +1477,7 @@ def explode(self, ignore_index): partitionings.Singleton() if ignore_index else partitionings.Index()) return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'explode', - lambda s: s.explode(ignore_index), [self._expr], + 'explode', lambda s: s.explode(ignore_index), [self._expr], preserves_partition_by=preserves, requires_partition_by=partitionings.Arbitrary())) @@ -1558,8 +1520,7 @@ def dot(self, other): if right_is_series: result = expressions.ComputedExpression( - 'extract', - lambda df: df[0], [sums], + 'extract', lambda df: df[0], [sums], requires_partition_by=partitionings.Singleton()) else: result = sums @@ -1592,8 +1553,7 @@ def quantile(self, q, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'quantile', - lambda df: df.quantile(q=q, **kwargs), [self._expr], + 'quantile', lambda df: df.quantile(q=q, **kwargs), [self._expr], requires_partition_by=requires, preserves_partition_by=partitionings.Singleton())) @@ -1679,9 +1639,8 @@ def corr(self, other, method, min_periods): # and custom partitioning. return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'corr', - lambda df, - other: df.corr(other, method=method, min_periods=min_periods), + 'corr', lambda df, other: df.corr( + other, method=method, min_periods=min_periods), [self._expr, other._expr], requires_partition_by=partitionings.Singleton(reason=reason))) @@ -1896,8 +1855,7 @@ def combine_co_moments(data): def dropna(self, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'dropna', - lambda df: df.dropna(**kwargs), [self._expr], + 'dropna', lambda df: df.dropna(**kwargs), [self._expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Arbitrary())) @@ -2006,8 +1964,8 @@ def aggregate(self, func, axis, *args, **kwargs): rows = [self.agg([f], *args, **kwargs) for f in func] return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'join_aggregate', - lambda *rows: pd.concat(rows), [row._expr for row in rows])) + 'join_aggregate', lambda *rows: pd.concat(rows), + [row._expr for row in rows])) else: # We're only handling a single column. It could be 'func' or ['func'], # which produce different results. 'func' produces a scalar, ['func'] @@ -2190,15 +2148,14 @@ def nlargest(self, keep, **kwargs): reason="order-sensitive") kwargs['keep'] = keep per_partition = expressions.ComputedExpression( - 'nlargest-per-partition', - lambda df: df.nlargest(**kwargs), [self._expr], + 'nlargest-per-partition', lambda df: df.nlargest(**kwargs), + [self._expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Arbitrary()) with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'nlargest', - lambda df: df.nlargest(**kwargs), [per_partition], + 'nlargest', lambda df: df.nlargest(**kwargs), [per_partition], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Singleton())) @@ -2220,15 +2177,14 @@ def nsmallest(self, keep, **kwargs): reason="order-sensitive") kwargs['keep'] = keep per_partition = expressions.ComputedExpression( - 'nsmallest-per-partition', - lambda df: df.nsmallest(**kwargs), [self._expr], + 'nsmallest-per-partition', lambda df: df.nsmallest(**kwargs), + [self._expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Arbitrary()) with expressions.allow_non_parallel_operations(True): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'nsmallest', - lambda df: df.nsmallest(**kwargs), [per_partition], + 'nsmallest', lambda df: df.nsmallest(**kwargs), [per_partition], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Singleton())) @@ -2247,16 +2203,14 @@ def set_index(s): preserves_partition_by=partitionings.Singleton()) is_unique_distributed = expressions.ComputedExpression( - 'is_unique_distributed', - lambda s: pd.Series(s.is_unique), [self_index], + 'is_unique_distributed', lambda s: pd.Series(s.is_unique), [self_index], requires_partition_by=partitionings.Index(), preserves_partition_by=partitionings.Singleton()) with expressions.allow_non_parallel_operations(): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'combine', - lambda s: s.all(), [is_unique_distributed], + 'combine', lambda s: s.all(), [is_unique_distributed], requires_partition_by=partitionings.Singleton(), preserves_partition_by=partitionings.Singleton())) @@ -2293,8 +2247,7 @@ def unique(self, as_series=False): reason="non-deferred-result") return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'unique', - lambda df: pd.Series(df.unique()), [self._expr], + 'unique', lambda df: pd.Series(df.unique()), [self._expr], preserves_partition_by=partitionings.Singleton(), requires_partition_by=partitionings.Singleton( reason="unique() cannot currently be parallelized."))) @@ -2302,9 +2255,8 @@ def unique(self, as_series=False): @frame_base.with_docs_from(pd.Series) def update(self, other): self._expr = expressions.ComputedExpression( - 'update', - lambda df, - other: df.update(other) or df, [self._expr, other._expr], + 'update', lambda df, other: df.update(other) or df, + [self._expr, other._expr], preserves_partition_by=partitionings.Arbitrary(), requires_partition_by=partitionings.Index()) @@ -2424,16 +2376,14 @@ def repeat(self, repeats, axis): if isinstance(repeats, int): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( - 'repeat', - lambda series: series.repeat(repeats), [self._expr], + 'repeat', lambda series: series.repeat(repeats), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) elif isinstance(repeats, frame_base.DeferredBase): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( 'repeat', - lambda series, - repeats_series: series.repeat(repeats_series), + lambda series, repeats_series: series.repeat(repeats_series), [self._expr, repeats._expr], requires_partition_by=partitionings.Index(), preserves_partition_by=partitionings.Arbitrary())) @@ -2467,8 +2417,7 @@ def compare(self, other, align_axis, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( 'compare', - lambda s, - other: s.compare(other, align_axis, **kwargs), + lambda s, other: s.compare(other, align_axis, **kwargs), [self._expr, other._expr], requires_partition_by=partitionings.Index(), preserves_partition_by=preserves_partition)) diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py index dbed10a4c8f9..2e560c013417 100644 --- a/sdks/python/apache_beam/dataframe/frames_test.py +++ b/sdks/python/apache_beam/dataframe/frames_test.py @@ -390,8 +390,7 @@ def test_tz_localize_ambiguous_series(self): ambiguous = pd.Series([True, True, False], index=s.index) self._run_test( - lambda s, - ambiguous: s.tz_localize('CET', ambiguous=ambiguous), + lambda s, ambiguous: s.tz_localize('CET', ambiguous=ambiguous), s, ambiguous) @@ -444,8 +443,7 @@ def test_combine_dataframe(self): df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 self._run_test( - lambda df, - df2: df.combine(df2, take_smaller), + lambda df, df2: df.combine(df2, take_smaller), df, df2, nonparallel=True) @@ -455,8 +453,7 @@ def test_combine_dataframe_fill(self): df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 self._run_test( - lambda df1, - df2: df1.combine(df2, take_smaller, fill_value=-5), + lambda df1, df2: df1.combine(df2, take_smaller, fill_value=-5), df1, df2, nonparallel=True) @@ -465,8 +462,7 @@ def test_combine_Series(self): s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0}) s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0}) self._run_test( - lambda s1, - s2: s1.combine(s2, max), + lambda s1, s2: s1.combine(s2, max), s1, s2, nonparallel=True, @@ -576,16 +572,14 @@ def test_merge(self): 'rkey': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8] }) self._run_test( - lambda df1, - df2: df1.merge(df2, left_on='lkey', right_on='rkey').rename( + lambda df1, df2: df1.merge(df2, left_on='lkey', right_on='rkey').rename( index=lambda x: '*'), df1, df2, nonparallel=True, check_proxy=False) self._run_test( - lambda df1, - df2: df1.merge( + lambda df1, df2: df1.merge( df2, left_on='lkey', right_on='rkey', suffixes=('_left', '_right')). rename(index=lambda x: '*'), df1, @@ -600,8 +594,8 @@ def test_merge_left_join(self): df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) self._run_test( - lambda df1, - df2: df1.merge(df2, how='left', on='a').rename(index=lambda x: '*'), + lambda df1, df2: df1.merge(df2, how='left', on='a').rename( + index=lambda x: '*'), df1, df2, nonparallel=True, @@ -618,8 +612,7 @@ def test_merge_on_index(self): }).set_index('rkey') self._run_test( - lambda df1, - df2: df1.merge(df2, left_index=True, right_index=True), + lambda df1, df2: df1.merge(df2, left_index=True, right_index=True), df1, df2, check_proxy=False) @@ -632,16 +625,14 @@ def test_merge_same_key(self): 'key': ['foo', 'bar', 'baz', 'foo'], 'value': [5, 6, 7, 8] }) self._run_test( - lambda df1, - df2: df1.merge(df2, on='key').rename(index=lambda x: '*'), + lambda df1, df2: df1.merge(df2, on='key').rename(index=lambda x: '*'), df1, df2, nonparallel=True, check_proxy=False) self._run_test( - lambda df1, - df2: df1.merge(df2, on='key', suffixes=('_left', '_right')).rename( - index=lambda x: '*'), + lambda df1, df2: df1.merge(df2, on='key', suffixes=('_left', '_right')). + rename(index=lambda x: '*'), df1, df2, nonparallel=True, @@ -652,16 +643,15 @@ def test_merge_same_key_doctest(self): df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]}) self._run_test( - lambda df1, - df2: df1.merge(df2, how='left', on='a').rename(index=lambda x: '*'), + lambda df1, df2: df1.merge(df2, how='left', on='a').rename( + index=lambda x: '*'), df1, df2, nonparallel=True, check_proxy=False) # Test without specifying 'on' self._run_test( - lambda df1, - df2: df1.merge(df2, how='left').rename(index=lambda x: '*'), + lambda df1, df2: df1.merge(df2, how='left').rename(index=lambda x: '*'), df1, df2, nonparallel=True, @@ -672,8 +662,7 @@ def test_merge_same_key_suffix_collision(self): df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4], 'a_rsuffix': [7, 8]}) self._run_test( - lambda df1, - df2: df1.merge( + lambda df1, df2: df1.merge( df2, how='left', on='a', suffixes=('_lsuffix', '_rsuffix')).rename( index=lambda x: '*'), df1, @@ -682,9 +671,9 @@ def test_merge_same_key_suffix_collision(self): check_proxy=False) # Test without specifying 'on' self._run_test( - lambda df1, - df2: df1.merge(df2, how='left', suffixes=('_lsuffix', '_rsuffix')). - rename(index=lambda x: '*'), + lambda df1, df2: df1.merge( + df2, how='left', suffixes=('_lsuffix', '_rsuffix')).rename( + index=lambda x: '*'), df1, df2, nonparallel=True, @@ -731,10 +720,8 @@ def test_value_counts_with_nans(self): for normalize in (True, False): for dropna in (True, False): self._run_test( - lambda df, - dropna=dropna, - normalize=normalize: df.num_wings.value_counts( - dropna=dropna, normalize=normalize), + lambda df, dropna=dropna, normalize=normalize: df.num_wings. + value_counts(dropna=dropna, normalize=normalize), df) def test_value_counts_does_not_support_sort(self): @@ -962,11 +949,8 @@ def test_dataframe_melt(self): df) self._run_test( lambda df: df.melt( - id_vars=['A'], - value_vars=['B'], - var_name='myVarname', - value_name='myValname', - ignore_index=False), + id_vars=['A'], value_vars=['B'], var_name='myVarname', value_name= + 'myValname', ignore_index=False), df) self._run_test( lambda df: df.melt( @@ -1041,14 +1025,12 @@ def test_append_verify_integrity(self): df2 = pd.DataFrame({'A': range(10), 'B': range(10)}, index=range(9, 19)) self._run_error_test( - lambda s1, - s2: s1.append(s2, verify_integrity=True), + lambda s1, s2: s1.append(s2, verify_integrity=True), df1['A'], df2['A'], construction_time=False) self._run_error_test( - lambda df1, - df2: df1.append(df2, verify_integrity=True), + lambda df1, df2: df1.append(df2, verify_integrity=True), df1, df2, construction_time=False) @@ -1144,12 +1126,12 @@ def test_drop_duplicates(self): ( lambda base: base.from_dict({ 'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd'] - }, - orient='index'), ), + }, orient='index'), ), ( lambda base: base.from_records( - np.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')], - dtype=[('col_1', 'i4'), ('col_2', 'U1')])), ), + np.array([(3, 'a'), (2, 'b'), (1, 'c'), + (0, 'd')], dtype=[('col_1', 'i4'), + ('col_2', 'U1')])), ), ]) def test_create_methods(self, func): expected = func(pd.DataFrame) @@ -1242,8 +1224,7 @@ def test_dt_tz_localize_ambiguous_series(self): ambiguous = pd.Series([True, True, False], index=s.index) self._run_test( - lambda s, - ambiguous: s.dt.tz_localize('CET', ambiguous=ambiguous), + lambda s, ambiguous: s.dt.tz_localize('CET', ambiguous=ambiguous), s, ambiguous) @@ -1297,25 +1278,22 @@ def test_compare_dataframe(self): self._run_test(lambda df1, df2: df1.compare(df2), df1, df2) self._run_test( - lambda df1, - df2: df1.compare(df2, align_axis=0), + lambda df1, df2: df1.compare(df2, align_axis=0), df1, df2, check_proxy=False) self._run_test(lambda df1, df2: df1.compare(df2, keep_shape=True), df1, df2) self._run_test( - lambda df1, - df2: df1.compare(df2, align_axis=0, keep_shape=True), + lambda df1, df2: df1.compare(df2, align_axis=0, keep_shape=True), df1, df2) self._run_test( - lambda df1, - df2: df1.compare(df2, keep_shape=True, keep_equal=True), + lambda df1, df2: df1.compare(df2, keep_shape=True, keep_equal=True), df1, df2) self._run_test( - lambda df1, - df2: df1.compare(df2, align_axis=0, keep_shape=True, keep_equal=True), + lambda df1, df2: df1.compare( + df2, align_axis=0, keep_shape=True, keep_equal=True), df1, df2) @@ -1956,8 +1934,7 @@ def test_groupby_aggregate_grouped_column(self): [2, 1], ['foo', 0], [1, 'str'], - [3, 0, 2, 1], - ]) + [3, 0, 2, 1], ]) def test_groupby_level_agg(self, level): df = GROUPBY_DF.set_index(['group', 'foo', 'bar', 'str'], drop=False) self._run_test(lambda df: df.groupby(level=level).bar.max(), df) @@ -2212,8 +2189,8 @@ def test_dataframe_agg_level(self): def test_series_agg_multifunc_level(self): # level= is ignored for multiple agg fns self._run_test( - lambda df: df.set_index(['group', 'foo']).bar.agg(['min', 'max'], - level=0), + lambda df: df.set_index(['group', 'foo']).bar.agg(['min', 'max'], level= + 0), GROUPBY_DF) def test_series_mean_skipna(self): @@ -2370,8 +2347,7 @@ def test_df_agg_operations_on_columns(self): mean_foo=('foo', lambda x: np.mean(x)), median_bar=('bar', lambda x: np.median(x)), sum_baz=('baz', 'sum'), - count_bool=('bool', 'count'), - ), + count_bool=('bool', 'count'), ), GROUPBY_DF) def test_std_mostly_na_with_ddof(self): @@ -2817,8 +2793,7 @@ def test_sample_with_weights_distribution(self): self.assertTrue(target_weight > other_weight * 10, "weights too close") result = self._evaluate( - lambda s, - weights: s.sample(n=num_samples, weights=weights).sum(), + lambda s, weights: s.sample(n=num_samples, weights=weights).sum(), # The first elements are 1, the rest are all 0. This means that when # we sum all the sampled elements (above), the result should be the # number of times the first elements (aka targets) were sampled. diff --git a/sdks/python/apache_beam/dataframe/io.py b/sdks/python/apache_beam/dataframe/io.py index 5fcb7326a026..a804e4b4f2d2 100644 --- a/sdks/python/apache_beam/dataframe/io.py +++ b/sdks/python/apache_beam/dataframe/io.py @@ -171,8 +171,7 @@ def to_json(df, path, orient=None, *args, **kwargs): @frame_base.with_docs_from(pd) def read_html(path, *args, **kwargs): return _ReadFromPandas( - lambda *args, - **kwargs: pd.read_html(*args, **kwargs)[0], + lambda *args, **kwargs: pd.read_html(*args, **kwargs)[0], path, args, kwargs) @@ -193,8 +192,8 @@ def to_html(df, path, *args, **kwargs): def _binary_reader(format): func = getattr(pd, 'read_%s' % format) - result = lambda path, *args, **kwargs: _ReadFromPandas(func, path, args, - kwargs) + result = lambda path, *args, **kwargs: _ReadFromPandas( + func, path, args, kwargs) result.__name__ = f'read_{format}' return result @@ -202,10 +201,8 @@ def _binary_reader(format): def _binary_writer(format): result = ( - lambda df, - path, - *args, - **kwargs: _as_pc(df) | _WriteToPandas(f'to_{format}', path, args, kwargs)) + lambda df, path, *args, **kwargs: _as_pc(df) | _WriteToPandas( + f'to_{format}', path, args, kwargs)) result.__name__ = f'to_{format}' return result @@ -294,9 +291,10 @@ def expand(self, root): matches_pcoll.pipeline | 'DoOnce' >> beam.Create([None]) | beam.Map( - lambda _, - paths: {path: ix - for ix, path in enumerate(sorted(paths))}, + lambda _, paths: { + path: ix + for ix, path in enumerate(sorted(paths)) + }, paths=beam.pvalue.AsList( matches_pcoll | beam.Map(lambda match: match.path)))) diff --git a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py index c7ea908a9336..aeafc4911ed7 100644 --- a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py +++ b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py @@ -70,8 +70,7 @@ def test_ndframe_tests(self): "df.loc['2016-01-05':'2016-01-10', :].tail()" ], 'pandas.core.generic.NDFrame.replace': [ - "s.replace([1, 2], method='bfill')", - # Relies on method='pad' + "s.replace([1, 2], method='bfill')", # Relies on method='pad' "s.replace('a')", # Relies on method='pad' # value=None is not valid for pandas < 1.4 @@ -142,8 +141,7 @@ def test_ndframe_tests(self): # some kind, 2 was passed # pandas doctests only verify the type of exception 'df.rename(2)' - ], - # For pandas >= 1.4, rename is changed to _rename + ], # For pandas >= 1.4, rename is changed to _rename 'pandas.core.generic.NDFrame._rename': [ # Seems to be an upstream bug. The actual error has a different # message: @@ -151,8 +149,7 @@ def test_ndframe_tests(self): # some kind, 2 was passed # pandas doctests only verify the type of exception 'df.rename(2)' - ], - # Tests rely on setting index + ], # Tests rely on setting index 'pandas.core.generic.NDFrame.rename_axis': ['*'], # Raises right exception, but testing framework has matching issues. 'pandas.core.generic.NDFrame.replace': [ @@ -160,7 +157,7 @@ def test_ndframe_tests(self): ], 'pandas.core.generic.NDFrame.squeeze': ['*'], - # NameError + # NameError 'pandas.core.generic.NDFrame.resample': ['df'], # Skipped so we don't need to install natsort @@ -211,8 +208,7 @@ def test_dataframe_tests(self): "df.nsmallest(3, 'population', keep='last')", ], 'pandas.core.frame.DataFrame.replace': [ - "s.replace([1, 2], method='bfill')", - # Relies on method='pad' + "s.replace([1, 2], method='bfill')", # Relies on method='pad' "s.replace('a')", # Relies on method='pad' # value=None is not valid for pandas < 1.4 @@ -256,8 +252,7 @@ def test_dataframe_tests(self): "df.melt(id_vars=[('A', 'D')], value_vars=[('B', 'E')])", "df.melt(id_vars=['A'], value_vars=['B'],\n" + " var_name='myVarname', value_name='myValname')" - ], - # Most keep= options are order-sensitive + ], # Most keep= options are order-sensitive 'pandas.core.frame.DataFrame.drop_duplicates': ['*'], 'pandas.core.frame.DataFrame.duplicated': [ 'df.duplicated()', @@ -294,20 +289,18 @@ def test_dataframe_tests(self): "df1.merge(df2, how='cross')" ], - # TODO(https://github.com/apache/beam/issues/20759) + # TODO(https://github.com/apache/beam/issues/20759) 'pandas.core.frame.DataFrame.set_index': [ "df.set_index([s, s**2])", ], - 'pandas.core.frame.DataFrame.set_axis': [ "df.set_axis(range(0,2), axis='index')", ], - # TODO(https://github.com/apache/beam/issues/21014) + # TODO(https://github.com/apache/beam/issues/21014) 'pandas.core.frame.DataFrame.value_counts': [ - 'df.value_counts(dropna=False)' + 'df.value_counts(dropna=False)' ], - 'pandas.core.frame.DataFrame.to_timestamp': ['*'] }, skip={ @@ -315,14 +308,12 @@ def test_dataframe_tests(self): '*': [ # mul doesn't work in Beam with axis='index'. "df.mul({'circle': 0, 'triangle': 2, 'rectangle': 3}, " - "axis='index')", - # eq doesn't work with axis='index'. + "axis='index')", # eq doesn't work with axis='index'. "df.eq([250, 250, 100], axis='index')", # New test in Pandas 2.1 that uses indexes. 'df != pd.Series([100, 250], index=["cost", "revenue"])', # New test in Pandas 2.1 that uses indexes. 'df.le(df_multindex, level=1)' - ], # DeferredDataFrame doesn't implement the DF interchange protocol. 'pandas.core.frame.DataFrame.__dataframe__': ['*'], @@ -335,20 +326,17 @@ def test_dataframe_tests(self): 'df', 'df2 = pd.DataFrame(data=df1, index=["a", "c"])', 'df2', - ], - # s2 created with reindex + ], # s2 created with reindex 'pandas.core.frame.DataFrame.dot': [ 'df.dot(s2)', ], - 'pandas.core.frame.DataFrame.resample': ['df'], 'pandas.core.frame.DataFrame.asfreq': ['*'], # Throws NotImplementedError when modifying df 'pandas.core.frame.DataFrame.axes': [ # Returns deferred index. 'df.axes', - ], - # Skipped because the relies on loc to set cells in df2 + ], # Skipped because the relies on loc to set cells in df2 'pandas.core.frame.DataFrame.compare': ['*'], 'pandas.core.frame.DataFrame.cov': [ # Relies on setting entries ahead of time. @@ -371,8 +359,7 @@ def test_dataframe_tests(self): # This should pass as set_axis(axis='columns') # and fail with set_axis(axis='index') "df.set_axis(['a', 'b', 'c'], axis='index')" - ], - # Beam's implementation takes a filepath as an argument. + ], # Beam's implementation takes a filepath as an argument. 'pandas.core.frame.DataFrame.to_html': ['*'], 'pandas.core.frame.DataFrame.to_markdown': ['*'], 'pandas.core.frame.DataFrame.to_parquet': ['*'], @@ -384,11 +371,10 @@ def test_dataframe_tests(self): 'df.insert(1, "newcol", [99, 99])', 'df.insert(0, "col1", [100, 100], allow_duplicates=True)' ], - 'pandas.core.frame.DataFrame.to_records': [ 'df.index = df.index.rename("I")', - 'index_dtypes = f" 100).mean()', ], - 'pandas.core.series.Series.asfreq': ['*'], - # error formatting + 'pandas.core.series.Series.asfreq': ['*'], # error formatting 'pandas.core.series.Series.append': [ 's1.append(s2, verify_integrity=True)', ], 'pandas.core.series.Series.cov': [ # Differs in LSB on jenkins. "s1.cov(s2)", - ], - # Test framework doesn't materialze DeferredIndex. + ], # Test framework doesn't materialze DeferredIndex. 'pandas.core.series.Series.keys': ['s.keys()'], # Skipped idxmax/idxmin due an issue with the test framework 'pandas.core.series.Series.idxmin': ['s.idxmin()'], @@ -620,14 +599,12 @@ def test_series_tests(self): # Fails when result is a singleton: # https://github.com/apache/beam/issues/28559 'pandas.core.series.Series.kurt': [ - 'df.kurt(axis=None).round(6)', - 's.kurt()' + 'df.kurt(axis=None).round(6)', 's.kurt()' ], # Fails when result is a singleton: # https://github.com/apache/beam/issues/28559 'pandas.core.series.Series.sem': [ - 'df.sem().round(6)', - 's.sem().round(6)' + 'df.sem().round(6)', 's.sem().round(6)' ], }) self.assertEqual(result.failed, 0) @@ -675,13 +652,13 @@ def test_string_tests(self): "pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr)" ], - # output has incorrect formatting in 1.2.x + # output has incorrect formatting in 1.2.x f'{module_name}.StringMethods.extractall': ['*'], # For split and rsplit, if expand=True, then the series # must be of CategoricalDtype, which pandas doesn't convert to f'{module_name}.StringMethods.rsplit': [ - 's.str.split(r"\\+|=", expand=True)', # for pandas<1.4 + 's.str.split(r"\\+|=", expand=True)', # for pandas<1.4 's.str.split(expand=True)', 's.str.rsplit("/", n=1, expand=True)', 's.str.split(r"and|plus", expand=True)', @@ -692,7 +669,7 @@ def test_string_tests(self): 's.str.split(r"\\.jpg", regex=False, expand=True)' ], f'{module_name}.StringMethods.split': [ - 's.str.split(r"\\+|=", expand=True)', # for pandas<1.4 + 's.str.split(r"\\+|=", expand=True)', # for pandas<1.4 's.str.split(expand=True)', 's.str.rsplit("/", n=1, expand=True)', 's.str.split(r"and|plus", expand=True)', @@ -741,16 +718,16 @@ def test_datetime_tests(self): ], 'pandas.core.indexes.accessors.TimedeltaProperties.to_pytimedelta': [ '*' - ], - # pylint: enable=line-too-long - # Test uses to_datetime. Beam calls to_datetime element-wise, and - # therefore the .tz attribute is not evaluated on entire Series. - # Hence, .tz becomes None, unless explicitly set. - # See: see test_tz_with_utc_zone_set_explicitly + ], # pylint: enable=line-too-long + # Test uses to_datetime. Beam calls to_datetime element-wise, and + # therefore the .tz attribute is not evaluated on entire Series. + # Hence, .tz becomes None, unless explicitly set. + # See: see test_tz_with_utc_zone_set_explicitly 'pandas.core.indexes.accessors.DatetimeProperties.tz': ['*'], }) datetimelike_result = doctests.testmod( - pd.core.arrays.datetimelike, use_beam=False, + pd.core.arrays.datetimelike, + use_beam=False, not_implemented_ok={ # Beam Dataframes don't implement a deferred to_timedelta operation. # Top-level issue: https://github.com/apache/beam/issues/20318 @@ -758,14 +735,12 @@ def test_datetime_tests(self): "ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='d'))", "tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')", 'tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])', # pylint: disable=line-too-long - "tdelta_idx", "tdelta_idx.inferred_freq", "tdelta_idx.mean()", ], }) - datetime_result = doctests.testmod( pd.core.arrays.datetimes, use_beam=False, @@ -782,7 +757,8 @@ def test_datetime_tests(self): '*': [ "ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='d'))", "tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')", - 'tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])'], # pylint: disable=line-too-long + 'tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])' + ], # pylint: disable=line-too-long # Verifies index version of this method 'pandas.core.arrays.datetimes.DatetimeArray.to_period': [ 'df.index.to_period("M")' @@ -872,8 +848,7 @@ def test_groupby_tests(self): 'pandas.core.groupby.groupby.GroupBy.resample': [ 'df.iloc[2, 0] = 5', 'df', - ], - # df is reassigned + ], # df is reassigned 'pandas.core.groupby.groupby.GroupBy.rank': ['df'], # TODO: Raise wont implement for list passed as a grouping column # Currently raises unhashable type: list @@ -887,11 +862,10 @@ def test_groupby_tests(self): pd.core.groupby.generic, use_beam=False, wont_implement_ok={ - '*' : [ + '*': [ # resample is WontImpl. "ser.resample('MS').nunique()", - ], - # TODO: Is take actually deprecated? + ], # TODO: Is take actually deprecated? 'pandas.core.groupby.generic.DataFrameGroupBy.take': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.take': ['*'], 'pandas.core.groupby.generic.SeriesGroupBy.nsmallest': [ @@ -945,23 +919,24 @@ def test_groupby_tests(self): "df.loc[df.index[:5], 'a'] = np.nan", "df.loc[df.index[5:10], 'b'] = np.nan", "df.cov(min_periods=12)", - ], - # These examples rely on grouping by a list + ], # These examples rely on grouping by a list 'pandas.core.groupby.generic.SeriesGroupBy.aggregate': ['*'], 'pandas.core.groupby.generic.DataFrameGroupBy.aggregate': ['*'], # Skipped idxmax/idxmin due an issue with the test framework 'pandas.core.groupby.generic.SeriesGroupBy.idxmin': ['s.idxmin()'], 'pandas.core.groupby.generic.SeriesGroupBy.idxmax': ['s.idxmax()'], # Order-sensitive operations. TODO: Return a better error message. - 'pandas.core.groupby.generic.SeriesGroupBy.is_monotonic_increasing': ['*'], # pylint: disable=line-too-long - 'pandas.core.groupby.generic.SeriesGroupBy.is_monotonic_decreasing': ['*'], # pylint: disable=line-too-long + 'pandas.core.groupby.generic.SeriesGroupBy.is_monotonic_increasing': [ + '*' + ], # pylint: disable=line-too-long + 'pandas.core.groupby.generic.SeriesGroupBy.is_monotonic_decreasing': [ + '*' + ], # pylint: disable=line-too-long # Uses as_index, which is currently not_implemented 'pandas.core.groupby.generic.DataFrameGroupBy.value_counts': [ - "df.groupby('gender', as_index=False).value_counts()", - # pylint: disable=line-too-long + "df.groupby('gender', as_index=False).value_counts()", # pylint: disable=line-too-long "df.groupby('gender', as_index=False).value_counts(normalize=True)", - ], - # These examples rely on grouping by a list + ], # These examples rely on grouping by a list 'pandas.core.groupby.generic.SeriesGroupBy.fillna': ['*'], # These examples rely on grouping by a list 'pandas.core.groupby.generic.DataFrameGroupBy.fillna': ['*'], @@ -972,8 +947,7 @@ def test_groupby_tests(self): # Named aggregation not supported yet. 'pandas.core.groupby.generic.NamedAgg': [ 'df.groupby("key").agg(result_a=agg_a, result_1=agg_1)' - ], - # These examples rely on grouping by a list + ], # These examples rely on grouping by a list 'pandas.core.groupby.generic.DataFrameGroupBy.transform': ['*'], # These examples rely on grouping by a list 'pandas.core.groupby.generic.SeriesGroupBy.transform': ['*'], @@ -1024,7 +998,9 @@ def test_top_level(self): 'pivot': ['*'], 'to_datetime': ['s.head()'], 'to_pickle': ['*'], - 'unique': ['pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)'], # pylint: disable=line-too-long + 'unique': [ + 'pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)' + ], # pylint: disable=line-too-long 'melt': [ "pd.melt(df, id_vars=['A'], value_vars=['B'])", "pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])", @@ -1039,34 +1015,27 @@ def test_top_level(self): 'concat': [ 'pd.concat([df5, df6], verify_integrity=True)', 'pd.concat([df7, new_row.to_frame().T], ignore_index=True)' - ], - # doctest DeprecationWarning - 'melt': ['df'], - # Order-sensitive re-indexing. + ], # doctest DeprecationWarning + 'melt': ['df'], # Order-sensitive re-indexing. 'merge': [ "df1.merge(df2, left_on='lkey', right_on='rkey')", "df1.merge(df2, left_on='lkey', right_on='rkey',\n" " suffixes=('_left', '_right'))", "df1.merge(df2, how='left', on='a')", - ], - # Not an actual test. + ], # Not an actual test. 'option_context': ['*'], 'factorize': ['codes', 'uniques'], # Bad top-level use of un-imported function. 'merge_ordered': [ 'merge_ordered(df1, df2, fill_method="ffill", left_by="group")' - ], - # Expected error. + ], # Expected error. 'pivot': [ "df.pivot(index='foo', columns='bar', values='baz')", "df.pivot(index='foo', columns='bar')['baz']", - "df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])", - # pylint: disable=line-too-long - 'df.pivot(index="lev1", columns=["lev2", "lev3"],values="values")', - # pylint: disable=line-too-long + "df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])", # pylint: disable=line-too-long + 'df.pivot(index="lev1", columns=["lev2", "lev3"],values="values")', # pylint: disable=line-too-long 'df.pivot(index=["lev1", "lev2"], columns=["lev3"],values="values")' - ], - # Never written. + ], # Never written. 'to_pickle': ['os.remove("./dummy.pkl")'], **skip_reads }) diff --git a/sdks/python/apache_beam/dataframe/pandas_top_level_functions.py b/sdks/python/apache_beam/dataframe/pandas_top_level_functions.py index a8139675ad39..6c17ad8a8272 100644 --- a/sdks/python/apache_beam/dataframe/pandas_top_level_functions.py +++ b/sdks/python/apache_beam/dataframe/pandas_top_level_functions.py @@ -131,14 +131,9 @@ def concat( expressions.ComputedExpression( 'concat', lambda *objs: pd.concat( - objs, - axis=axis, - join=join, - ignore_index=ignore_index, - keys=keys, - levels=levels, - names=names, - verify_integrity=verify_integrity), # yapf break + objs, axis=axis, join=join, ignore_index=ignore_index, keys= + keys, levels=levels, names=names, verify_integrity= + verify_integrity), # yapf break exprs, requires_partition_by=required_partitioning, preserves_partition_by=preserves_partitioning)) diff --git a/sdks/python/apache_beam/dataframe/transforms.py b/sdks/python/apache_beam/dataframe/transforms.py index 59e5eec05d2f..7128726f5eb1 100644 --- a/sdks/python/apache_beam/dataframe/transforms.py +++ b/sdks/python/apache_beam/dataframe/transforms.py @@ -208,8 +208,8 @@ def expand(self, pcolls): | 'SumSizes' >> beam.CombineGlobally(sum) | 'NumPartitions' >> beam.Map( lambda size: max( - MIN_PARTITIONS, - min(MAX_PARTITIONS, size // TARGET_PARTITION_SIZE)))) + MIN_PARTITIONS, min( + MAX_PARTITIONS, size // TARGET_PARTITION_SIZE)))) partition_fn = self.stage.partitioning.partition_fn @@ -247,8 +247,8 @@ def expand(self, pcoll): def evaluate(partition, stage=self.stage, **side_inputs): def lookup(expr): # Use proxy if there's no data in this partition - return expr.proxy( - ).iloc[:0] if partition[expr._id] is None else partition[expr._id] + return expr.proxy().iloc[:0] if partition[ + expr._id] is None else partition[expr._id] session = expressions.Session( dict([(expr, lookup(expr)) for expr in tabular_inputs] + @@ -420,8 +420,10 @@ def expr_to_stage(expr): @_memoize def stage_to_result(stage): - return {expr._id: expr_to_pcoll(expr) - for expr in stage.inputs} | ComputeStage(stage) + return { + expr._id: expr_to_pcoll(expr) + for expr in stage.inputs + } | ComputeStage(stage) @_memoize def expr_to_pcoll(expr): diff --git a/sdks/python/apache_beam/dataframe/transforms_test.py b/sdks/python/apache_beam/dataframe/transforms_test.py index a143606cc913..6b070090c624 100644 --- a/sdks/python/apache_beam/dataframe/transforms_test.py +++ b/sdks/python/apache_beam/dataframe/transforms_test.py @@ -302,8 +302,7 @@ def check(actual): assert_that( dict(x=one, y=two) | 'DictIn' >> transforms.DataframeTransform( - lambda x, - y: (x + y), + lambda x, y: (x + y), proxy=dict(x=proxy, y=proxy), yield_elements='pandas'), equal_to_series(three_series), @@ -348,8 +347,7 @@ def test_rename(self): with expressions.allow_non_parallel_operations(): self.run_scenario( - df, - lambda df: df.rename( + df, lambda df: df.rename( columns={'B': 'C'}, index={ 0: 2, 2: 0 }, errors='raise')) diff --git a/sdks/python/apache_beam/examples/avro_nyc_trips.py b/sdks/python/apache_beam/examples/avro_nyc_trips.py index 23d25649dad5..bc36c8c2ff17 100644 --- a/sdks/python/apache_beam/examples/avro_nyc_trips.py +++ b/sdks/python/apache_beam/examples/avro_nyc_trips.py @@ -180,13 +180,9 @@ def add_input(self, accumulator, record): return ( total_price + sum( record[name] for name in ( - 'base_passenger_fare', - 'tolls', - 'bcf', - 'sales_tax', - 'congestion_surcharge', - 'airport_fee', - 'tips') if record[name] is not None), + 'base_passenger_fare', 'tolls', 'bcf', 'sales_tax', + 'congestion_surcharge', 'airport_fee', 'tips') + if record[name] is not None), total_driver_pay + record['driver_pay'] + record['tips'], total_trip_miles + record['trip_miles'], total_trip_time + record['trip_time'], diff --git a/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py b/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py index a616998d2c73..559cd6913925 100644 --- a/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py +++ b/sdks/python/apache_beam/examples/inference/pytorch_language_modeling.py @@ -181,18 +181,19 @@ def batch_elements_kwargs(self): bert_tokenizer = BertTokenizer.from_pretrained(known_args.bert_tokenizer) if not known_args.input: - text = (pipeline | 'CreateSentences' >> beam.Create([ - 'The capital of France is Paris .', - 'It is raining cats and dogs .', - 'He looked up and saw the sun and stars .', - 'Today is Monday and tomorrow is Tuesday .', - 'There are 5 coconuts on this palm tree .', - 'The richest person in the world is not here .', - 'Malls are amazing places to shop because you can find everything you need under one roof .', # pylint: disable=line-too-long - 'This audiobook is sure to liquefy your brain .', - 'The secret ingredient to his wonderful life was gratitude .', - 'The biggest animal in the world is the whale .', - ])) + text = ( + pipeline | 'CreateSentences' >> beam.Create([ + 'The capital of France is Paris .', + 'It is raining cats and dogs .', + 'He looked up and saw the sun and stars .', + 'Today is Monday and tomorrow is Tuesday .', + 'There are 5 coconuts on this palm tree .', + 'The richest person in the world is not here .', + 'Malls are amazing places to shop because you can find everything you need under one roof .', # pylint: disable=line-too-long + 'This audiobook is sure to liquefy your brain .', + 'The secret ingredient to his wonderful life was gratitude .', + 'The biggest animal in the world is the whale .', + ])) else: text = ( pipeline | 'ReadSentences' >> beam.io.ReadFromText(known_args.input)) diff --git a/sdks/python/apache_beam/examples/inference/xgboost_iris_classification.py b/sdks/python/apache_beam/examples/inference/xgboost_iris_classification.py index 498511a5a2cf..fa0582905e94 100644 --- a/sdks/python/apache_beam/examples/inference/xgboost_iris_classification.py +++ b/sdks/python/apache_beam/examples/inference/xgboost_iris_classification.py @@ -101,8 +101,8 @@ def load_sklearn_iris_test_data( dataset['data'], dataset['target'], test_size=.2, random_state=seed) if split: - return [(index, data_type(sample.reshape(1, -1))) for index, - sample in enumerate(x_test)] + return [(index, data_type(sample.reshape(1, -1))) + for index, sample in enumerate(x_test)] return [(0, data_type(x_test))] diff --git a/sdks/python/apache_beam/examples/ml_transform/vocab_tfidf_processing.py b/sdks/python/apache_beam/examples/ml_transform/vocab_tfidf_processing.py index b8ae61ce51e5..ff9c03f864b7 100644 --- a/sdks/python/apache_beam/examples/ml_transform/vocab_tfidf_processing.py +++ b/sdks/python/apache_beam/examples/ml_transform/vocab_tfidf_processing.py @@ -94,8 +94,7 @@ def expand(self, pcoll): shuffled_examples | beam.Map( lambda label_review: { - REVIEW_COLUMN: label_review[0], - LABEL_COLUMN: label_review[1], + REVIEW_COLUMN: label_review[0], LABEL_COLUMN: label_review[1], RAW_DATA_KEY: label_review[0] })) diff --git a/sdks/python/apache_beam/examples/snippets/snippets_test.py b/sdks/python/apache_beam/examples/snippets/snippets_test.py index 552a32c81929..3714c0574e05 100644 --- a/sdks/python/apache_beam/examples/snippets/snippets_test.py +++ b/sdks/python/apache_beam/examples/snippets/snippets_test.py @@ -998,8 +998,8 @@ def test_model_co_group_by_key_tuple(self): ] # [END model_group_by_key_cogroupbykey_tuple_formatted_outputs] expected_results = [ - '%s; %s; %s' % (name, info['emails'], info['phones']) for name, - info in results + '%s; %s; %s' % (name, info['emails'], info['phones']) + for name, info in results ] self.assertEqual(expected_results, formatted_results) self.assertEqual(formatted_results, self.get_output(result_path)) @@ -1441,12 +1441,13 @@ def test_side_input_slow_update(self): for j in range(count): f.write('f' + idstr + 'a' + str(j) + '\n') - sample_main_input_elements = ([first_ts - 2, # no output due to no SI - first_ts + 1, # First window - first_ts + 8, # Second window - first_ts + 15, # Third window - first_ts + 22, # Fourth window - ]) + sample_main_input_elements = ([ + first_ts - 2, # no output due to no SI + first_ts + 1, # First window + first_ts + 8, # Second window + first_ts + 15, # Third window + first_ts + 22, # Fourth window + ]) pipeline, pipeline_result = snippets.side_input_slow_update( src_file_pattern, first_ts, last_ts, interval, diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximatequantiles_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximatequantiles_test.py index 2adfcd05b99a..0b8f2b5c0f6c 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximatequantiles_test.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximatequantiles_test.py @@ -32,8 +32,7 @@ @mock.patch('apache_beam.Pipeline', TestPipeline) @mock.patch( 'apache_beam.examples.snippets.transforms.aggregation.' - 'approximatequantiles.print', - lambda x: x) + 'approximatequantiles.print', lambda x: x) class ApproximateQuantilesTest(unittest.TestCase): def test_approximatequantiles(self): def check_result(quantiles): diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximateunique_test.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximateunique_test.py index c945cec534b8..c1e005a1a2f6 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximateunique_test.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/approximateunique_test.py @@ -33,8 +33,7 @@ @mock.patch('apache_beam.Pipeline', TestPipeline) @mock.patch( 'apache_beam.examples.snippets.transforms.aggregation.' - 'approximateunique.print', - lambda x: x) + 'approximateunique.print', lambda x: x) class ApproximateUniqueTest(unittest.TestCase): def test_approximateunique(self): def check_result(approx_count): diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_side_inputs_singleton.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_side_inputs_singleton.py index b20571bde62d..eb6182667173 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_side_inputs_singleton.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combineperkey_side_inputs_singleton.py @@ -37,8 +37,7 @@ def combineperkey_side_inputs_singleton(test=None): ('🍅', 3), ]) | 'Saturated sum' >> beam.CombinePerKey( - lambda values, - max_value: min(sum(values), max_value), + lambda values, max_value: min(sum(values), max_value), max_value=beam.pvalue.AsSingleton(max_value)) | beam.Map(print)) # [END combineperkey_side_inputs_singleton] diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_side_inputs_singleton.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_side_inputs_singleton.py index efec1635c18e..c07697b2c88b 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_side_inputs_singleton.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/combinevalues_side_inputs_singleton.py @@ -49,8 +49,7 @@ def combinevalues_side_inputs_singleton(test=None): ('🍅', [4, 5, 3]), ]) | 'Saturated sum' >> beam.CombineValues( - lambda values, - max_value: min(sum(values), max_value), + lambda values, max_value: min(sum(values), max_value), max_value=beam.pvalue.AsSingleton(max_value)) | beam.Map(print)) # [END combinevalues_side_inputs_singleton] diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_of.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_of.py index 51094a993de9..8596e9fcfec8 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_of.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_of.py @@ -48,12 +48,11 @@ def top_of(test=None): '🌽 Corn', ]) | 'Shortest names' >> beam.combiners.Top.Of( - 2, # number of elements - key=len, # optional, defaults to the element itself + 2, # number of elements + key=len, # optional, defaults to the element itself reverse=True, # optional, defaults to False (largest/descending) ) - | beam.Map(print) - ) + | beam.Map(print)) # [END top_of] if test: test(shortest_elements) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_per_key.py b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_per_key.py index 676f10ffc310..246de87f55ea 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_per_key.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/aggregation/top_per_key.py @@ -51,12 +51,11 @@ def top_per_key(test=None): ('winter', '🍆 Eggplant'), ]) | 'Shortest names per key' >> beam.combiners.Top.PerKey( - 2, # number of elements - key=len, # optional, defaults to the value itself + 2, # number of elements + key=len, # optional, defaults to the value itself reverse=True, # optional, defaults to False (largest/descending) ) - | beam.Map(print) - ) + | beam.Map(print)) # [END top_per_key] if test: test(shortest_elements_per_key) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_dict.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_dict.py index 64a4b0aa97c5..a969765e4d12 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_dict.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_dict.py @@ -65,8 +65,7 @@ def filter_side_inputs_dict(test=None): }, ]) | 'Filter plants by duration' >> beam.Filter( - lambda plant, - keep_duration: keep_duration[plant['duration']], + lambda plant, keep_duration: keep_duration[plant['duration']], keep_duration=beam.pvalue.AsDict(keep_duration), ) | beam.Map(print)) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_iter.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_iter.py index 42043a38c35b..9db1c61443e4 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_iter.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_iter.py @@ -65,8 +65,7 @@ def filter_side_inputs_iter(test=None): }, ]) | 'Filter valid plants' >> beam.Filter( - lambda plant, - valid_durations: plant['duration'] in valid_durations, + lambda plant, valid_durations: plant['duration'] in valid_durations, valid_durations=beam.pvalue.AsIter(valid_durations), ) | beam.Map(print)) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_singleton.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_singleton.py index 5971082becd3..34662b4e257b 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_singleton.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/filter_side_inputs_singleton.py @@ -61,8 +61,7 @@ def filter_side_inputs_singleton(test=None): }, ]) | 'Filter perennials' >> beam.Filter( - lambda plant, - duration: plant['duration'] == duration, + lambda plant, duration: plant['duration'] == duration, duration=beam.pvalue.AsSingleton(perennial), ) | beam.Map(print)) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_side_inputs_singleton.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_side_inputs_singleton.py index 84aed00b1046..6abf3f485a18 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_side_inputs_singleton.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/flatmap_side_inputs_singleton.py @@ -48,8 +48,7 @@ def flatmap_side_inputs_singleton(test=None): '🍅Tomato,🥔Potato', ]) | 'Split words' >> beam.FlatMap( - lambda text, - delimiter: text.split(delimiter), + lambda text, delimiter: text.split(delimiter), delimiter=beam.pvalue.AsSingleton(delimiter), ) | beam.Map(print)) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_context.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_context.py index 26b5558928cb..a827e72b3f3a 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_context.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_context.py @@ -71,9 +71,8 @@ def random_nonce(): ], reshuffle=False) | 'Strip header' >> beam.Map( - lambda text, - a=beam.DoFn.SetupContextParam(random_nonce), - b=beam.DoFn.BundleContextParam(random_nonce): f"{text} {a} {b}") + lambda text, a=beam.DoFn.SetupContextParam(random_nonce), b=beam. + DoFn.BundleContextParam(random_nonce): f"{text} {a} {b}") | beam.Map(print)) # [END map_context] if test: diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_iter.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_iter.py index c155764a0cd4..f0be086e918c 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_iter.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_iter.py @@ -51,8 +51,7 @@ def map_side_inputs_iter(test=None): '# 🥔Potato\n', ]) | 'Strip header' >> beam.Map( - lambda text, - chars: text.strip(''.join(chars)), + lambda text, chars: text.strip(''.join(chars)), chars=beam.pvalue.AsIter(chars), ) | beam.Map(print)) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_singleton.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_singleton.py index 323134e315ea..8dc6ebb354a9 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_singleton.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/map_side_inputs_singleton.py @@ -51,8 +51,7 @@ def map_side_inputs_singleton(test=None): '# 🥔Potato\n', ]) | 'Strip header' >> beam.Map( - lambda text, - chars: text.strip(chars), + lambda text, chars: text.strip(chars), chars=beam.pvalue.AsSingleton(chars), ) | beam.Map(print)) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_event_time.py b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_event_time.py index c5e4013e0300..1dea30b3ce7c 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_event_time.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/elementwise/withtimestamps_event_time.py @@ -45,17 +45,26 @@ def process(self, plant, timestamp=beam.DoFn.TimestampParam): plant_timestamps = ( pipeline | 'Garden plants' >> beam.Create([ - {'name': 'Strawberry', 'season': 1585699200}, # April, 2020 - {'name': 'Carrot', 'season': 1590969600}, # June, 2020 - {'name': 'Artichoke', 'season': 1583020800}, # March, 2020 - {'name': 'Tomato', 'season': 1588291200}, # May, 2020 - {'name': 'Potato', 'season': 1598918400}, # September, 2020 + { + 'name': 'Strawberry', 'season': 1585699200 + }, # April, 2020 + { + 'name': 'Carrot', 'season': 1590969600 + }, # June, 2020 + { + 'name': 'Artichoke', 'season': 1583020800 + }, # March, 2020 + { + 'name': 'Tomato', 'season': 1588291200 + }, # May, 2020 + { + 'name': 'Potato', 'season': 1598918400 + }, # September, 2020 ]) | 'With timestamps' >> beam.Map( lambda plant: beam.window.TimestampedValue(plant, plant['season'])) | 'Get timestamp' >> beam.ParDo(GetTimestamp()) - | beam.Map(print) - ) + | beam.Map(print)) # [END withtimestamps_event_time] if test: test(plant_timestamps) diff --git a/sdks/python/apache_beam/examples/snippets/transforms/other/window.py b/sdks/python/apache_beam/examples/snippets/transforms/other/window.py index 484917e77658..8f7a1047792c 100644 --- a/sdks/python/apache_beam/examples/snippets/transforms/other/window.py +++ b/sdks/python/apache_beam/examples/snippets/transforms/other/window.py @@ -41,22 +41,38 @@ def window(test=None): import apache_beam as beam with beam.Pipeline() as pipeline: - produce = (pipeline - | 'Garden plants' >> beam.Create([ - {'name': 'Strawberry', 'season': 1585699200}, # April, 2020 - {'name': 'Strawberry', 'season': 1588291200}, # May, 2020 - {'name': 'Carrot', 'season': 1590969600}, # June, 2020 - {'name': 'Artichoke', 'season': 1583020800}, # March, 2020 - {'name': 'Artichoke', 'season': 1585699200}, # April, 2020 - {'name': 'Tomato', 'season': 1588291200}, # May, 2020 - {'name': 'Potato', 'season': 1598918400}, # September, 2020 - ]) - | 'With timestamps' >> beam.Map(lambda plant: beam.window.TimestampedValue(plant['name'], plant['season'])) - | 'Window into fixed 2-month windows' >> beam.WindowInto( - beam.window.FixedWindows(2 * 30 * 24 * 60 * 60)) - | 'Count per window' >> beam.combiners.Count.PerElement() - | 'Print results' >> beam.Map(print) - ) + produce = ( + pipeline + | 'Garden plants' >> beam.Create([ + { + 'name': 'Strawberry', 'season': 1585699200 + }, # April, 2020 + { + 'name': 'Strawberry', 'season': 1588291200 + }, # May, 2020 + { + 'name': 'Carrot', 'season': 1590969600 + }, # June, 2020 + { + 'name': 'Artichoke', 'season': 1583020800 + }, # March, 2020 + { + 'name': 'Artichoke', 'season': 1585699200 + }, # April, 2020 + { + 'name': 'Tomato', 'season': 1588291200 + }, # May, 2020 + { + 'name': 'Potato', 'season': 1598918400 + }, # September, 2020 + ]) + | 'With timestamps' >> beam.Map( + lambda plant: beam.window.TimestampedValue( + plant['name'], plant['season'])) + | 'Window into fixed 2-month windows' >> beam.WindowInto( + beam.window.FixedWindows(2 * 30 * 24 * 60 * 60)) + | 'Count per window' >> beam.combiners.Count.PerElement() + | 'Print results' >> beam.Map(print)) # [END window] if test: diff --git a/sdks/python/apache_beam/examples/sql_taxi.py b/sdks/python/apache_beam/examples/sql_taxi.py index e8a29806d72a..9cee37305f68 100644 --- a/sdks/python/apache_beam/examples/sql_taxi.py +++ b/sdks/python/apache_beam/examples/sql_taxi.py @@ -50,8 +50,8 @@ def run(output_topic, pipeline_args): # Use beam.Row to create a schema-aware PCollection | "Create beam Row" >> beam.Map( lambda x: beam.Row( - ride_status=str(x['ride_status']), - passenger_count=int(x['passenger_count']))) + ride_status=str(x['ride_status']), passenger_count=int( + x['passenger_count']))) # SqlTransform will computes result within an existing window | "15s fixed windows" >> beam.WindowInto(beam.window.FixedWindows(15)) # Aggregate drop offs and pick ups that occur within each 15s window @@ -68,13 +68,10 @@ def run(output_topic, pipeline_args): # the outputs of the query. # Collect those attributes, as well as window information, into a dict | "Assemble Dictionary" >> beam.Map( - lambda row, - window=beam.DoFn.WindowParam: { - "ride_status": row.ride_status, - "num_rides": row.num_rides, - "total_passengers": row.total_passengers, - "window_start": window.start.to_rfc3339(), - "window_end": window.end.to_rfc3339() + lambda row, window=beam.DoFn.WindowParam: { + "ride_status": row.ride_status, "num_rides": row.num_rides, + "total_passengers": row.total_passengers, "window_start": window + .start.to_rfc3339(), "window_end": window.end.to_rfc3339() }) | "Convert to JSON" >> beam.Map(json.dumps) | "UTF-8 encode" >> beam.Map(lambda s: s.encode("utf-8")) diff --git a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py index 48980526cf18..ce4240d96591 100644 --- a/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py +++ b/sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py @@ -545,8 +545,7 @@ class id will also reuse this class definition. skeleton_class = types.new_class( name, - bases, {"metaclass": type_constructor}, - lambda ns: ns.update(type_kwargs)) + bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)) return _lookup_class_or_track(class_tracker_id, skeleton_class) @@ -1432,11 +1431,14 @@ def save_global(self, obj, name=None, pack=struct.pack): dispatched here. """ if obj is type(None): # noqa - return self.save_reduce(type, (None, ), obj=obj) + return self.save_reduce( + type, (None, ), obj=obj) elif obj is type(Ellipsis): - return self.save_reduce(type, (Ellipsis, ), obj=obj) + return self.save_reduce( + type, (Ellipsis, ), obj=obj) elif obj is type(NotImplemented): - return self.save_reduce(type, (NotImplemented, ), obj=obj) + return self.save_reduce( + type, (NotImplemented, ), obj=obj) elif obj in _BUILTIN_TYPE_NAMES: return self.save_reduce( _builtin_type, (_BUILTIN_TYPE_NAMES[obj], ), obj=obj) diff --git a/sdks/python/apache_beam/internal/dill_pickler.py b/sdks/python/apache_beam/internal/dill_pickler.py index 35953f438576..3c0e9f6c08ad 100644 --- a/sdks/python/apache_beam/internal/dill_pickler.py +++ b/sdks/python/apache_beam/internal/dill_pickler.py @@ -312,7 +312,8 @@ def save_module(pickler, obj): else: dill_log.info('M2: %s' % obj) # pylint: disable=protected-access - pickler.save_reduce(dill.dill._import_module, (obj.__name__, ), obj=obj) + pickler.save_reduce( + dill.dill._import_module, (obj.__name__, ), obj=obj) # pylint: enable=protected-access dill_log.info('# M2') diff --git a/sdks/python/apache_beam/internal/metrics/metric.py b/sdks/python/apache_beam/internal/metrics/metric.py index 8acf800ff8c6..19e2694acc8d 100644 --- a/sdks/python/apache_beam/internal/metrics/metric.py +++ b/sdks/python/apache_beam/internal/metrics/metric.py @@ -152,8 +152,8 @@ def log_metrics(self, reset_after_logging: bool = False) -> None: if self._lock.acquire(False): try: current_millis = int(time.time() * 1000) - if ((current_millis - self._last_logging_millis) > - self.minimum_logging_frequency_msec): + if ((current_millis - self._last_logging_millis) + > self.minimum_logging_frequency_msec): logging_metric_info = [ '[Locally aggregated metrics since %s]' % datetime.datetime.fromtimestamp( diff --git a/sdks/python/apache_beam/internal/util.py b/sdks/python/apache_beam/internal/util.py index 85a6e4c43b83..cf2b5fdbb6b3 100644 --- a/sdks/python/apache_beam/internal/util.py +++ b/sdks/python/apache_beam/internal/util.py @@ -97,8 +97,7 @@ def swapper(value): # by sorting the entries first. This will be important when putting back # PValues. new_kwargs = dict((k, swapper(v)) if isinstance(v, pvalue_class) else (k, v) - for k, - v in sorted(kwargs.items())) + for k, v in sorted(kwargs.items())) return (new_args, new_kwargs, pvals) @@ -123,8 +122,8 @@ def insert_values_in_args(args, kwargs, values): for arg in args ] new_kwargs = dict( - (k, next(v_iter)) if isinstance(v, ArgumentPlaceholder) else (k, v) for k, - v in sorted(kwargs.items())) + (k, next(v_iter)) if isinstance(v, ArgumentPlaceholder) else (k, v) + for k, v in sorted(kwargs.items())) return (new_args, new_kwargs) diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py index d22ac84fea36..3438cb5d61fe 100644 --- a/sdks/python/apache_beam/io/avroio.py +++ b/sdks/python/apache_beam/io/avroio.py @@ -410,13 +410,8 @@ def __init__( """ self._schema = schema self._sink_provider = lambda avro_schema: _create_avro_sink( - file_path_prefix, - avro_schema, - codec, - file_name_suffix, - num_shards, - shard_name_template, - mime_type) + file_path_prefix, avro_schema, codec, file_name_suffix, num_shards, + shard_name_template, mime_type) def expand(self, pcoll): if self._schema: diff --git a/sdks/python/apache_beam/io/avroio_test.py b/sdks/python/apache_beam/io/avroio_test.py index 7bf256d34ec1..633b1307eb45 100644 --- a/sdks/python/apache_beam/io/avroio_test.py +++ b/sdks/python/apache_beam/io/avroio_test.py @@ -172,8 +172,8 @@ def test_schema_read_write(self): @pytest.mark.xlang_sql_expansion_service @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - None, + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is None, "Must be run with a runner that supports staging java artifacts.") def test_avro_schema_to_beam_schema_with_nullable_atomic_fields(self): records = [] diff --git a/sdks/python/apache_beam/io/external/xlang_debeziumio_it_test.py b/sdks/python/apache_beam/io/external/xlang_debeziumio_it_test.py index abe9530787e8..f343f88ec802 100644 --- a/sdks/python/apache_beam/io/external/xlang_debeziumio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_debeziumio_it_test.py @@ -37,8 +37,8 @@ @unittest.skipIf( PostgresContainer is None, 'testcontainers package is not installed') @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - None, + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is None, 'Do not run this test on precommit suites.') class CrossLanguageDebeziumIOTest(unittest.TestCase): def setUp(self): diff --git a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py index ca6fa3d711ca..9aed0d5f11d5 100644 --- a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py @@ -100,12 +100,12 @@ @unittest.skipIf( PostgresContainer is None, 'testcontainers package is not installed') @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - None, + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is None, 'Do not run this test on precommit suites.') @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - not None and + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is not None and "dataflowrunner" in TestPipeline().get_pipeline_options().view_as( StandardOptions).runner.lower(), 'Do not run this test on dataflow runner.') diff --git a/sdks/python/apache_beam/io/filebasedsink.py b/sdks/python/apache_beam/io/filebasedsink.py index eb433bd60583..8bb0f7e2171e 100644 --- a/sdks/python/apache_beam/io/filebasedsink.py +++ b/sdks/python/apache_beam/io/filebasedsink.py @@ -440,10 +440,10 @@ def write(self, value): def at_capacity(self): return ( self.sink.max_records_per_shard and - self.num_records_written >= self.sink.max_records_per_shard - ) or ( - self.sink.max_bytes_per_shard and - self.sink.byte_counter.bytes_written >= self.sink.max_bytes_per_shard) + self.num_records_written >= self.sink.max_records_per_shard) or ( + self.sink.max_bytes_per_shard and + self.sink.byte_counter.bytes_written + >= self.sink.max_bytes_per_shard) def close(self): self.sink.close(self.temp_handle) diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py index 111206a18a28..3251e567763f 100644 --- a/sdks/python/apache_beam/io/fileio.py +++ b/sdks/python/apache_beam/io/fileio.py @@ -879,12 +879,13 @@ def finish_bundle(self): sink.flush() writer.close() - file_result = FileResult(self._file_names[key], - shard_index=-1, - total_shards=0, - window=key[1], - pane=None, # TODO(pabloem): get the pane info - destination=key[0]) + file_result = FileResult( + self._file_names[key], + shard_index=-1, + total_shards=0, + window=key[1], + pane=None, # TODO(pabloem): get the pane info + destination=key[0]) yield beam.pvalue.TaggedOutput( self.WRITTEN_FILES, diff --git a/sdks/python/apache_beam/io/fileio_test.py b/sdks/python/apache_beam/io/fileio_test.py index ff4be9d3d7cc..6733df8c70bf 100644 --- a/sdks/python/apache_beam/io/fileio_test.py +++ b/sdks/python/apache_beam/io/fileio_test.py @@ -497,8 +497,8 @@ def test_write_to_single_file_batch(self): def test_write_to_dynamic_destination(self): sink_params = [ - fileio.TextSink, # pass a type signature - fileio.TextSink() # pass a FileSink object + fileio.TextSink, # pass a type signature + fileio.TextSink() # pass a FileSink object ] for sink in sink_params: @@ -522,8 +522,8 @@ def test_write_to_dynamic_destination(self): | fileio.ReadMatches() | beam.Map( lambda f: ( - os.path.basename(f.metadata.path).split('-')[0], - sorted(map(int, f.read_utf8().strip().split('\n')))))) + os.path.basename(f.metadata.path).split('-')[0], sorted( + map(int, f.read_utf8().strip().split('\n')))))) assert_that( result, diff --git a/sdks/python/apache_beam/io/filesystem_test.py b/sdks/python/apache_beam/io/filesystem_test.py index ff53648d692a..ff701132bf75 100644 --- a/sdks/python/apache_beam/io/filesystem_test.py +++ b/sdks/python/apache_beam/io/filesystem_test.py @@ -210,8 +210,8 @@ def test_match_glob(self, file_pattern, expected_object_names): # It's a filter function of type (str, int) -> bool # that returns true for expected objects filter_func = expected_object_names - expected_object_names = [(short_path, size) for short_path, - size in objects + expected_object_names = [(short_path, size) + for short_path, size in objects if filter_func(short_path, size)] for object_name, size in objects: @@ -219,8 +219,7 @@ def test_match_glob(self, file_pattern, expected_object_names): self.fs._insert_random_file(file_name, size) expected_file_names = [('gs://%s/%s' % (bucket_name, object_name), size) - for object_name, - size in expected_object_names] + for object_name, size in expected_object_names] actual_file_names = [ (file_metadata.path, file_metadata.size_in_bytes) for file_metadata in self._flatten_match(self.fs.match([file_pattern])) diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py index 581859698c67..08014511538c 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery.py +++ b/sdks/python/apache_beam/io/gcp/bigquery.py @@ -1618,8 +1618,8 @@ def process( # Flush current batch first if adding this row will exceed our limits # limits: byte size; number of rows - if ((self._destination_buffer_byte_size[destination] + row_byte_size > - self._max_insert_payload_size) or + if ((self._destination_buffer_byte_size[destination] + row_byte_size + > self._max_insert_payload_size) or len(self._rows_buffer[destination]) >= self._max_batch_size): flushed_batch = self._flush_batch(destination) # After flushing our existing batch, we now buffer the current row @@ -1713,9 +1713,8 @@ def _flush_batch(self, destination): # - WARNING when we are continuing to retry, and have a deadline. # - ERROR when we will no longer retry, or MAY retry forever. log_level = ( - logging.WARN if should_retry or - self._retry_strategy != RetryStrategy.RETRY_ALWAYS else - logging.ERROR) + logging.WARN if should_retry or self._retry_strategy + != RetryStrategy.RETRY_ALWAYS else logging.ERROR) _LOGGER.log(log_level, message) @@ -1741,16 +1740,13 @@ def _flush_batch(self, destination): [ pvalue.TaggedOutput( BigQueryWriteFn.FAILED_ROWS_WITH_ERRORS, - w.with_value((destination, row, err))) for row, - err, - w in failed_rows + w.with_value((destination, row, err))) + for row, err, w in failed_rows ], [ pvalue.TaggedOutput( BigQueryWriteFn.FAILED_ROWS, w.with_value((destination, row))) - for row, - unused_err, - w in failed_rows + for row, unused_err, w in failed_rows ]) @@ -2332,10 +2328,9 @@ def to_runner_api_parameter(self, context): # remove_objects_from_args and insert_values_in_args # are currently implemented. def serialize(side_inputs): - return {(SIDE_INPUT_PREFIX + '%s') % ix: - si.to_runner_api(context).SerializeToString() - for ix, - si in enumerate(side_inputs)} + return {(SIDE_INPUT_PREFIX + '%s') % ix: si.to_runner_api( + context).SerializeToString() + for ix, si in enumerate(side_inputs)} table_side_inputs = serialize(self.table_side_inputs) schema_side_inputs = serialize(self.schema_side_inputs) @@ -2383,8 +2378,8 @@ def deserialize(side_inputs): # to_runner_api_parameter above). indexed_side_inputs = [( get_sideinput_index(tag), - pvalue.AsSideInput.from_runner_api(si, context)) for tag, - si in deserialized_side_inputs.items()] + pvalue.AsSideInput.from_runner_api(si, context)) + for tag, si in deserialized_side_inputs.items()] return [si for _, si in sorted(indexed_side_inputs)] config['table_side_inputs'] = deserialize(config['table_side_inputs']) @@ -2662,8 +2657,8 @@ def expand(self, input): failed_rows = failed_rows | beam.Map(lambda row: row.as_dict()) failed_rows_with_errors = failed_rows_with_errors | beam.Map( lambda row: { - "error_message": row.error_message, - "failed_row": row.failed_row.as_dict() + "error_message": row.error_message, "failed_row": row.failed_row. + as_dict() }) return WriteResult( @@ -2683,8 +2678,8 @@ def expand(self, input_dicts): | "Convert dict to Beam Row" >> beam.Map( lambda row: beam.Row( **{ - StorageWriteToBigQuery.DESTINATION: row[0], - StorageWriteToBigQuery.RECORD: bigquery_tools. + StorageWriteToBigQuery.DESTINATION: row[ + 0], StorageWriteToBigQuery.RECORD: bigquery_tools. beam_row_from_dict(row[1], self.schema) }))) else: diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py index 2f0375859235..62cf7a2c52c9 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py @@ -1157,8 +1157,7 @@ def _load_data( # https://github.com/apache/beam/issues/24535. finished_temp_tables_load_job_ids_list_pc = ( finished_temp_tables_load_job_ids_pc | beam.MapTuple( - lambda destination, - job_reference: ( + lambda destination, job_reference: ( bigquery_tools.parse_table_reference(destination).tableId, (destination, job_reference))) | beam.GroupByKey() @@ -1246,8 +1245,7 @@ def expand(self, pcoll): singleton_pc | "SchemaModJobNamePrefix" >> beam.Map( lambda _: _generate_job_name( - job_name, - bigquery_tools.BigQueryJobTypes.LOAD, + job_name, bigquery_tools.BigQueryJobTypes.LOAD, 'SCHEMA_MOD_STEP'))) copy_job_name_pcv = pvalue.AsSingleton( diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py index 6908a0fb0392..84e8ecfc486e 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py @@ -872,12 +872,12 @@ def __call__(self): if is_streaming: _SIZE = len(_ELEMENTS) fisrt_batch = [ - TimestampedValue(value, start_time + i + 1) for i, - value in enumerate(_ELEMENTS[:_SIZE // 2]) + TimestampedValue(value, start_time + i + 1) + for i, value in enumerate(_ELEMENTS[:_SIZE // 2]) ] second_batch = [ - TimestampedValue(value, start_time + _SIZE // 2 + i + 1) for i, - value in enumerate(_ELEMENTS[_SIZE // 2:]) + TimestampedValue(value, start_time + _SIZE // 2 + i + 1) + for i, value in enumerate(_ELEMENTS[_SIZE // 2:]) ] # Advance processing time between batches of input elements to fire the # user triggers. Intentionally advance the processing time twice for the @@ -1076,12 +1076,10 @@ def test_multiple_destinations_transform(self): _ = ( input | "WriteWithMultipleDestsFreely" >> bigquery.WriteToBigQuery( - table=lambda x, - tables: + table=lambda x, tables: (tables['table1'] if 'language' in x else tables['table2']), table_side_inputs=(table_record_pcv, ), - schema=lambda dest, - schema_map: schema_map.get(dest, None), + schema=lambda dest, schema_map: schema_map.get(dest, None), schema_side_inputs=(schema_map_pcv, ), create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY)) @@ -1090,8 +1088,7 @@ def test_multiple_destinations_transform(self): input | "WriteWithMultipleDests" >> bigquery.WriteToBigQuery( table=lambda x: (output_table_3 if 'language' in x else output_table_4), - schema=lambda dest, - schema_map: schema_map.get(dest, None), + schema=lambda dest, schema_map: schema_map.get(dest, None), schema_side_inputs=(schema_map_pcv, ), create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY, diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py index 435fe67d02fc..05bb1cdac4a8 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py @@ -172,8 +172,8 @@ def test_row_as_table_row(self): '"g": "LINESTRING(1 2, 3 4, 5 6, 7 8)"}') schema = bigquery.TableSchema( fields=[ - bigquery.TableFieldSchema(name=k, type=v) for k, - v in schema_definition + bigquery.TableFieldSchema(name=k, type=v) + for k, v in schema_definition ]) coder = TableRowJsonCoder(table_schema=schema) @@ -211,8 +211,8 @@ def json_compliance_exception(self, value): schema_definition = [('f', 'FLOAT')] schema = bigquery.TableSchema( fields=[ - bigquery.TableFieldSchema(name=k, type=v) for k, - v in schema_definition + bigquery.TableFieldSchema(name=k, type=v) + for k, v in schema_definition ]) coder = TableRowJsonCoder(table_schema=schema) test_row = bigquery.TableRow( @@ -962,8 +962,7 @@ def test_to_from_runner_api(self): schema = value_provider.StaticValueProvider(str, '"a:str"') original = WriteToBigQuery( - table=lambda _, - side_input: side_input['table'], + table=lambda _, side_input: side_input['table'], table_side_inputs=(table_record_pcv, ), schema=schema) @@ -978,8 +977,7 @@ def test_to_from_runner_api(self): # Find the transform from the context. write_to_bq_id = [ - k for k, - v in pipeline_proto.components.transforms.items() + k for k, v in pipeline_proto.components.transforms.items() if v.unique_name == 'MyWriteToBigQuery' ][0] deserialized_node = context.transforms.get_by_id(write_to_bq_id) @@ -1219,25 +1217,27 @@ class BigQueryStreamingInsertsErrorHandling(unittest.TestCase): # failed rows param( insert_response=[ - exceptions.TooManyRequests if exceptions else None, - None], - error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS + exceptions.TooManyRequests if exceptions else None, None + ], + error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS failed_rows=[]), # reason not in _NON_TRANSIENT_ERRORS for row 1 on both attempts, sent to # failed rows after hitting max_retries param( insert_response=[ - exceptions.InternalServerError if exceptions else None, - exceptions.InternalServerError if exceptions else None], - error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS + exceptions.InternalServerError if exceptions else None, + exceptions.InternalServerError if exceptions else None + ], + error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS failed_rows=['value1', 'value3', 'value5']), # reason in _NON_TRANSIENT_ERRORS for row 1 on both attempts, sent to # failed_rows after hitting max_retries param( insert_response=[ - exceptions.Forbidden if exceptions else None, - exceptions.Forbidden if exceptions else None], - error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS + exceptions.Forbidden if exceptions else None, + exceptions.Forbidden if exceptions else None + ], + error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS failed_rows=['value1', 'value3', 'value5']), ]) def test_insert_rows_json_exception_retry_always( @@ -1363,63 +1363,63 @@ def test_insert_rows_json_exception_retry_never( @parameterized.expand([ param( exception_type=exceptions.DeadlineExceeded if exceptions else None, - error_reason='Deadline Exceeded', # not in _NON_TRANSIENT_ERRORS + error_reason='Deadline Exceeded', # not in _NON_TRANSIENT_ERRORS failed_values=[], expected_call_count=2), param( exception_type=exceptions.Conflict if exceptions else None, - error_reason='Conflict', # not in _NON_TRANSIENT_ERRORS + error_reason='Conflict', # not in _NON_TRANSIENT_ERRORS failed_values=[], expected_call_count=2), param( exception_type=exceptions.TooManyRequests if exceptions else None, - error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS + error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS failed_values=[], expected_call_count=2), param( exception_type=exceptions.InternalServerError if exceptions else None, - error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS + error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS failed_values=[], expected_call_count=2), param( exception_type=exceptions.BadGateway if exceptions else None, - error_reason='Bad Gateway', # not in _NON_TRANSIENT_ERRORS + error_reason='Bad Gateway', # not in _NON_TRANSIENT_ERRORS failed_values=[], expected_call_count=2), param( exception_type=exceptions.ServiceUnavailable if exceptions else None, - error_reason='Service Unavailable', # not in _NON_TRANSIENT_ERRORS + error_reason='Service Unavailable', # not in _NON_TRANSIENT_ERRORS failed_values=[], expected_call_count=2), param( exception_type=exceptions.GatewayTimeout if exceptions else None, - error_reason='Gateway Timeout', # not in _NON_TRANSIENT_ERRORS + error_reason='Gateway Timeout', # not in _NON_TRANSIENT_ERRORS failed_values=[], expected_call_count=2), param( exception_type=exceptions.BadRequest if exceptions else None, - error_reason='Bad Request', # in _NON_TRANSIENT_ERRORS + error_reason='Bad Request', # in _NON_TRANSIENT_ERRORS failed_values=['value1', 'value2'], expected_call_count=1), param( exception_type=exceptions.Unauthorized if exceptions else None, - error_reason='Unauthorized', # in _NON_TRANSIENT_ERRORS + error_reason='Unauthorized', # in _NON_TRANSIENT_ERRORS failed_values=['value1', 'value2'], expected_call_count=1), param( exception_type=exceptions.Forbidden if exceptions else None, - error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS + error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS failed_values=['value1', 'value2'], expected_call_count=1), param( exception_type=exceptions.NotFound if exceptions else None, - error_reason='Not Found', # in _NON_TRANSIENT_ERRORS + error_reason='Not Found', # in _NON_TRANSIENT_ERRORS failed_values=['value1', 'value2'], expected_call_count=1), param( exception_type=exceptions.MethodNotImplemented - if exceptions else None, - error_reason='Not Implemented', # in _NON_TRANSIENT_ERRORS + if exceptions else None, + error_reason='Not Implemented', # in _NON_TRANSIENT_ERRORS failed_values=['value1', 'value2'], expected_call_count=1), ]) @@ -2460,12 +2460,10 @@ def test_multiple_destinations_transform(self): r = ( input | "WriteWithMultipleDests" >> beam.io.gcp.bigquery.WriteToBigQuery( - table=lambda x, - tables: + table=lambda x, tables: (tables['table1'] if 'language' in x else tables['table2']), table_side_inputs=(table_record_pcv, ), - schema=lambda dest, - table_map: table_map.get(dest, None), + schema=lambda dest, table_map: table_map.get(dest, None), schema_side_inputs=(schema_table_pcv, ), insert_retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR, method='STREAMING_INSERTS')) @@ -2665,8 +2663,7 @@ def test_avro_file_load(self): input | 'WriteToBigQuery' >> beam.io.gcp.bigquery.WriteToBigQuery( table='%s:%s' % (self.project, self.output_table), - schema=lambda _, - schema: schema, + schema=lambda _, schema: schema, schema_side_inputs=(beam.pvalue.AsSingleton(schema_pc), ), method='FILE_LOADS', temp_file_format=bigquery_tools.FileFormat.AVRO, diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py index 28935aa5cdd9..081571bfef99 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py @@ -1282,8 +1282,8 @@ def insert_rows( # can happen during retries on failures. # TODO(silviuc): Must add support to writing TableRow's instead of dicts. insert_ids = [ - str(self.unique_row_id) if not insert_ids else insert_ids[i] for i, - _ in enumerate(rows) + str(self.unique_row_id) if not insert_ids else insert_ids[i] + for i, _ in enumerate(rows) ] rows = [ fast_json_loads(fast_json_dumps(r, default=default_encoder)) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py index 5b54e7076e23..522c8667f183 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py @@ -994,9 +994,8 @@ def test_typehints_from_repeated_schema(self): schema = {"fields": self.get_schema_fields_with_mode("repeated")} typehints = get_beam_typehints_from_tableschema(schema) - expected_repeated_typehints = [ - (name, Sequence[type]) for name, type in self.EXPECTED_TYPEHINTS - ] + expected_repeated_typehints = [(name, Sequence[type]) + for name, type in self.EXPECTED_TYPEHINTS] self.assertEqual(typehints, expected_repeated_typehints) @@ -1004,9 +1003,8 @@ def test_typehints_from_nullable_schema(self): schema = {"fields": self.get_schema_fields_with_mode("nullable")} typehints = get_beam_typehints_from_tableschema(schema) - expected_nullable_typehints = [ - (name, Optional[type]) for name, type in self.EXPECTED_TYPEHINTS - ] + expected_nullable_typehints = [(name, Optional[type]) + for name, type in self.EXPECTED_TYPEHINTS] self.assertEqual(typehints, expected_nullable_typehints) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py index 88ba5984d9b0..c694383dcf9b 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py @@ -595,8 +595,10 @@ def test_big_query_write_temp_table_append_schema_update(self, file_format): max_file_size=1, # bytes method=beam.io.WriteToBigQuery.Method.FILE_LOADS, additional_bq_parameters={ - 'schemaUpdateOptions': ['ALLOW_FIELD_ADDITION', - 'ALLOW_FIELD_RELAXATION']}, + 'schemaUpdateOptions': [ + 'ALLOW_FIELD_ADDITION', 'ALLOW_FIELD_RELAXATION' + ] + }, temp_file_format=file_format)) diff --git a/sdks/python/apache_beam/io/gcp/experimental/spannerio.py b/sdks/python/apache_beam/io/gcp/experimental/spannerio.py index 3b616a2452a8..7b615e223cfc 100644 --- a/sdks/python/apache_beam/io/gcp/experimental/spannerio.py +++ b/sdks/python/apache_beam/io/gcp/experimental/spannerio.py @@ -680,15 +680,25 @@ class ReadFromSpanner(PTransform): A PTransform to perform reads from cloud spanner. ReadFromSpanner uses BatchAPI to perform all read operations. """ - - def __init__(self, project_id, instance_id, database_id, pool=None, - read_timestamp=None, exact_staleness=None, credentials=None, - sql=None, params=None, param_types=None, # with_query - table=None, query_name=None, columns=None, index="", - keyset=None, # with_table - read_operations=None, # for read all - transaction=None - ): + def __init__( + self, + project_id, + instance_id, + database_id, + pool=None, + read_timestamp=None, + exact_staleness=None, + credentials=None, + sql=None, + params=None, + param_types=None, # with_query + table=None, + query_name=None, + columns=None, + index="", + keyset=None, # with_table + read_operations=None, # for read all + transaction=None): """ A PTransform that uses Spanner Batch API to perform reads. @@ -986,11 +996,8 @@ def __init__( self._replace = replace self._delete = delete - if sum([1 for x in [self._insert, - self._update, - self._insert_or_update, - self._replace, - self._delete] if x is not None]) != 1: + if sum([1 for x in [self._insert, self._update, self._insert_or_update, + self._replace, self._delete] if x is not None]) != 1: raise ValueError( "No or more than one write mutation operation " "provided: <%s: %s>" % (self.__class__.__name__, str(self.__dict__))) diff --git a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py index ade8529dcac8..08fdd6302887 100644 --- a/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py +++ b/sdks/python/apache_beam/io/gcp/gcsfilesystem_test.py @@ -189,10 +189,8 @@ def test_copy_file_error(self, mock_gcsio): gcsio_mock.copy.side_effect = exception # Issue batch rename. - expected_results = { - (s, d): exception - for s, d in zip(sources, destinations) - } + expected_results = {(s, d): exception + for s, d in zip(sources, destinations)} # Issue batch copy. with self.assertRaisesRegex(BeamIOError, diff --git a/sdks/python/apache_beam/io/gcp/pubsub_test.py b/sdks/python/apache_beam/io/gcp/pubsub_test.py index 73ba8d6abdb6..feee9dc0082b 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub_test.py +++ b/sdks/python/apache_beam/io/gcp/pubsub_test.py @@ -329,9 +329,9 @@ def test_expand_with_multiple_sources_and_other_options(self): PubSubSourceDescriptor( source=source, id_label=id_label, - timestamp_attribute=timestamp_attribute) for source, - id_label, - timestamp_attribute in zip(sources, id_labels, timestamp_attributes) + timestamp_attribute=timestamp_attribute) + for source, id_label, timestamp_attribute in zip( + sources, id_labels, timestamp_attributes) ] pcoll = (p | MultipleReadFromPubSub(pubsub_sources) | beam.Map(lambda x: x)) diff --git a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py index 4504ba43b2c1..ff48a5644916 100644 --- a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py +++ b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py @@ -231,8 +231,8 @@ def _matches(self, _): _LOGGER.info('Table proto is %s', self.actual_table) return all( - self._match_property(v, self._get_or_none(self.actual_table, k)) for k, - v in self.expected_properties.items()) + self._match_property(v, self._get_or_none(self.actual_table, k)) + for k, v in self.expected_properties.items()) @staticmethod def _get_or_none(obj, attr): @@ -250,8 +250,8 @@ def _match_property(expected, actual): if isinstance(expected, dict): return all( BigQueryTableMatcher._match_property( - v, BigQueryTableMatcher._get_or_none(actual, k)) for k, - v in expected.items()) + v, BigQueryTableMatcher._get_or_none(actual, k)) + for k, v in expected.items()) else: return expected == actual diff --git a/sdks/python/apache_beam/io/gcp/tests/utils_test.py b/sdks/python/apache_beam/io/gcp/tests/utils_test.py index 5ac41df1d3e5..a6b91ca14965 100644 --- a/sdks/python/apache_beam/io/gcp/tests/utils_test.py +++ b/sdks/python/apache_beam/io/gcp/tests/utils_test.py @@ -210,14 +210,13 @@ def test_read_from_pubsub_many(self): } for i in range(number_of_elements)] ack_ids = ['ack_id_{}'.format(i) for i in range(number_of_elements)] messages = [ - PubsubMessage(data, attributes) for data, - attributes in zip(data_list, attributes_list) + PubsubMessage(data, attributes) + for data, attributes in zip(data_list, attributes_list) ] response_messages = [ test_utils.PullResponseMessage(data, attributes, ack_id=ack_id) - for data, - attributes, - ack_id in zip(data_list, attributes_list, ack_ids) + for data, attributes, ack_id in zip( + data_list, attributes_list, ack_ids) ] class SequentialPullResponse(object): diff --git a/sdks/python/apache_beam/io/localfilesystem_test.py b/sdks/python/apache_beam/io/localfilesystem_test.py index 1370790970e9..df528170c082 100644 --- a/sdks/python/apache_beam/io/localfilesystem_test.py +++ b/sdks/python/apache_beam/io/localfilesystem_test.py @@ -357,8 +357,8 @@ def check_tree(self, path, value, expected_leaf_count=None): elif isinstance(value, dict): # recurse to check subdirectory tree actual_leaf_count = sum([ - self.check_tree(os.path.join(path, basename), v) for basename, - v in value.items() + self.check_tree(os.path.join(path, basename), v) + for basename, v in value.items() ]) else: raise Exception('Unexpected value in tempdir tree: %s' % value) diff --git a/sdks/python/apache_beam/io/mongodbio.py b/sdks/python/apache_beam/io/mongodbio.py index 6ffc82f59676..834c051aca5c 100644 --- a/sdks/python/apache_beam/io/mongodbio.py +++ b/sdks/python/apache_beam/io/mongodbio.py @@ -458,12 +458,12 @@ def _get_split_keys( with MongoClient(self.uri, **self.spec) as client: name_space = "%s.%s" % (self.db, self.coll) return client[self.db].command( - "splitVector", - name_space, - keyPattern={"_id": 1}, # Ascending index - min={"_id": start_pos}, - max={"_id": end_pos}, - maxChunkSize=desired_chunk_size_in_mb, + "splitVector", + name_space, + keyPattern={"_id": 1}, # Ascending index + min={"_id": start_pos}, + max={"_id": end_pos}, + maxChunkSize=desired_chunk_size_in_mb, )["splitKeys"] def _get_auto_buckets( diff --git a/sdks/python/apache_beam/io/mongodbio_test.py b/sdks/python/apache_beam/io/mongodbio_test.py index 150eac2d5437..d9f630d01861 100644 --- a/sdks/python/apache_beam/io/mongodbio_test.py +++ b/sdks/python/apache_beam/io/mongodbio_test.py @@ -104,9 +104,10 @@ def _filter(self, filter): @staticmethod def _projection(docs, projection=None): if projection: - return [{k: v - for k, v in doc.items() if k in projection or k == '_id'} - for doc in docs] + return [{ + k: v + for k, v in doc.items() if k in projection or k == '_id' + } for doc in docs] return docs def find(self, filter=None, projection=None, **kwargs): diff --git a/sdks/python/apache_beam/io/range_trackers.py b/sdks/python/apache_beam/io/range_trackers.py index ba56fd3f3559..626564b00948 100644 --- a/sdks/python/apache_beam/io/range_trackers.py +++ b/sdks/python/apache_beam/io/range_trackers.py @@ -207,9 +207,9 @@ def split_points(self): if self._split_points_unclaimed_callback else iobase.RangeTracker.SPLIT_POINTS_UNKNOWN) split_points_remaining = ( - iobase.RangeTracker.SPLIT_POINTS_UNKNOWN - if split_points_unclaimed == iobase.RangeTracker.SPLIT_POINTS_UNKNOWN - else (split_points_unclaimed + 1)) + iobase.RangeTracker.SPLIT_POINTS_UNKNOWN if split_points_unclaimed + == iobase.RangeTracker.SPLIT_POINTS_UNKNOWN else + (split_points_unclaimed + 1)) return (split_points_consumed, split_points_remaining) diff --git a/sdks/python/apache_beam/io/source_test_utils.py b/sdks/python/apache_beam/io/source_test_utils.py index b40f70604c42..1b3aeb474462 100644 --- a/sdks/python/apache_beam/io/source_test_utils.py +++ b/sdks/python/apache_beam/io/source_test_utils.py @@ -156,8 +156,8 @@ def assert_sources_equal_reference_source(reference_source_info, sources_info): 'source_info must a three tuple where first' 'item of the tuple gives a ' 'iobase.BoundedSource. Received: %r' % source_info) - if (type(reference_source_info[0].default_output_coder()) != type( - source_info[0].default_output_coder())): + if (type(reference_source_info[0].default_output_coder()) + != type(source_info[0].default_output_coder())): raise ValueError( 'Reference source %r and the source %r must use the same coder. ' 'They are using %r and %r respectively instead.' % ( @@ -341,8 +341,8 @@ def _assert_split_at_fraction_behavior( num_items_to_read_before_split, split_result)) - elif ( - expected_outcome != ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS): + elif (expected_outcome + != ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS): raise ValueError('Unknown type of expected outcome: %r' % expected_outcome) current_items.extend([value for value in reader_iter]) diff --git a/sdks/python/apache_beam/metrics/cells.py b/sdks/python/apache_beam/metrics/cells.py index ac6b278abc25..b4703c5b5b96 100644 --- a/sdks/python/apache_beam/metrics/cells.py +++ b/sdks/python/apache_beam/metrics/cells.py @@ -689,8 +689,7 @@ def from_proto(proto: metrics_pb2.BoundedTrieNode) -> '_BoundedTrieNode': else: node._children = { name: _BoundedTrieNode.from_proto(child) - for name, - child in proto.children.items() + for name, child in proto.children.items() } node._size = max(1, sum(child._size for child in node._children.values())) return node diff --git a/sdks/python/apache_beam/metrics/cells_test.py b/sdks/python/apache_beam/metrics/cells_test.py index 1cd15fced86c..106f7542b230 100644 --- a/sdks/python/apache_beam/metrics/cells_test.py +++ b/sdks/python/apache_beam/metrics/cells_test.py @@ -49,7 +49,8 @@ def test_parallel_access(self): threads = [] c = CounterCell() for _ in range(TestCounterCell.NUM_THREADS): - t = threading.Thread(target=TestCounterCell._modify_counter, args=(c, )) + t = threading.Thread( + target=TestCounterCell._modify_counter, args=(c, )) threads.append(t) t.start() diff --git a/sdks/python/apache_beam/metrics/execution.py b/sdks/python/apache_beam/metrics/execution.py index c28c8340a505..a3414447c48f 100644 --- a/sdks/python/apache_beam/metrics/execution.py +++ b/sdks/python/apache_beam/metrics/execution.py @@ -287,32 +287,27 @@ def get_cumulative(self): """ counters = { MetricKey(self.step_name, k.metric_name): v.get_cumulative() - for k, - v in self.metrics.items() if k.cell_type == CounterCell + for k, v in self.metrics.items() if k.cell_type == CounterCell } distributions = { MetricKey(self.step_name, k.metric_name): v.get_cumulative() - for k, - v in self.metrics.items() if k.cell_type == DistributionCell + for k, v in self.metrics.items() if k.cell_type == DistributionCell } gauges = { MetricKey(self.step_name, k.metric_name): v.get_cumulative() - for k, - v in self.metrics.items() if k.cell_type == GaugeCell + for k, v in self.metrics.items() if k.cell_type == GaugeCell } string_sets = { MetricKey(self.step_name, k.metric_name): v.get_cumulative() - for k, - v in self.metrics.items() if k.cell_type == StringSetCell + for k, v in self.metrics.items() if k.cell_type == StringSetCell } bounded_tries = { MetricKey(self.step_name, k.metric_name): v.get_cumulative() - for k, - v in self.metrics.items() if k.cell_type == BoundedTrieCell + for k, v in self.metrics.items() if k.cell_type == BoundedTrieCell } return MetricUpdates( @@ -320,8 +315,8 @@ def get_cumulative(self): def to_runner_api(self): return [ - cell.to_runner_api_user_metric(key.metric_name) for key, - cell in self.metrics.items() + cell.to_runner_api_user_metric(key.metric_name) + for key, cell in self.metrics.items() ] def to_runner_api_monitoring_infos(self, transform_id): @@ -332,8 +327,7 @@ def to_runner_api_monitoring_infos(self, transform_id): items = list(self.metrics.items()) all_metrics = [ cell.to_runner_api_monitoring_info(key.metric_name, transform_id) - for key, - cell in items + for key, cell in items ] return { monitoring_infos.to_key(mi): mi diff --git a/sdks/python/apache_beam/metrics/monitoring_infos.py b/sdks/python/apache_beam/metrics/monitoring_infos.py index cb4e60e218f6..6dc4b7ef9c57 100644 --- a/sdks/python/apache_beam/metrics/monitoring_infos.py +++ b/sdks/python/apache_beam/metrics/monitoring_infos.py @@ -495,8 +495,7 @@ def merge(a, b): return metrics_pb2.MonitoringInfo( urn=a.urn, type=a.type, - labels=dict((label, value) for label, - value in a.labels.items() + labels=dict((label, value) for label, value in a.labels.items() if b.labels.get(label) == value), payload=combiner(a.payload, b.payload)) diff --git a/sdks/python/apache_beam/ml/anomaly/specifiable.py b/sdks/python/apache_beam/ml/anomaly/specifiable.py index 9cefcbfeabee..010fb4da29c2 100644 --- a/sdks/python/apache_beam/ml/anomaly/specifiable.py +++ b/sdks/python/apache_beam/ml/anomaly/specifiable.py @@ -181,8 +181,7 @@ def from_spec(cls, kwargs = { k: _specifiable_from_spec_helper(v, _run_init) - for k, - v in spec.config.items() + for k, v in spec.config.items() } if _run_init: diff --git a/sdks/python/apache_beam/ml/anomaly/specifiable_test.py b/sdks/python/apache_beam/ml/anomaly/specifiable_test.py index 4312b5b1fbf6..ccd8efd286cb 100644 --- a/sdks/python/apache_beam/ml/anomaly/specifiable_test.py +++ b/sdks/python/apache_beam/ml/anomaly/specifiable_test.py @@ -542,8 +542,7 @@ def test_lambda_function(self): type= f"", # pylint: disable=line-too-long config=None) - } - )) + })) w_2 = Specifiable.from_spec(w_spec) self.assertEqual(w_2.run_func(5, 3), 2) diff --git a/sdks/python/apache_beam/ml/anomaly/transforms.py b/sdks/python/apache_beam/ml/anomaly/transforms.py index d704f93ed618..b2ea0733353d 100644 --- a/sdks/python/apache_beam/ml/anomaly/transforms.py +++ b/sdks/python/apache_beam/ml/anomaly/transforms.py @@ -330,14 +330,10 @@ def expand( ret = ( post_gbk | beam.MapTuple( - lambda k, - v: ( - k[0], - ( - k[1], - AnomalyResult( - example=v[0].example, - predictions=[ + lambda k, v: ( + k[0], ( + k[1], AnomalyResult( + example=v[0].example, predictions=[ prediction for result in v for prediction in result.predictions ]))))) @@ -359,15 +355,10 @@ def expand( ret = ( post_gbk | beam.MapTuple( - lambda k, - v, - agg=aggregation_fn: ( - k[0], - ( - k[1], - AnomalyResult( - example=v[0].example, - predictions=[ + lambda k, v, agg=aggregation_fn: ( + k[0], ( + k[1], AnomalyResult( + example=v[0].example, predictions=[ agg.apply([ prediction for result in v for prediction in result.predictions diff --git a/sdks/python/apache_beam/ml/anomaly/transforms_test.py b/sdks/python/apache_beam/ml/anomaly/transforms_test.py index f743b1629ee6..c1c7a4f47f90 100644 --- a/sdks/python/apache_beam/ml/anomaly/transforms_test.py +++ b/sdks/python/apache_beam/ml/anomaly/transforms_test.py @@ -155,8 +155,8 @@ def test_one_detector(self): result, equal_to([( input[0], AnomalyResult(example=input[1], predictions=[decision])) - for input, - decision in zip(self._input, zscore_x1_expected)], + for input, decision in zip(self._input, zscore_x1_expected) + ], _keyed_result_is_equal_to)) def test_multiple_detectors_without_aggregation(self): @@ -228,9 +228,7 @@ def test_multiple_detectors_without_aggregation(self): input[0], AnomalyResult( example=input[1], predictions=[decision1, decision2])) - for input, - decision1, - decision2 in zip( + for input, decision1, decision2 in zip( self._input, zscore_x1_expected, zscore_x2_expected)], _keyed_result_is_equal_to)) @@ -267,8 +265,7 @@ def test_multiple_sub_detectors_with_aggregation(self): equal_to([( input[0], AnomalyResult(example=input[1], predictions=[prediction])) - for input, - prediction in zip(self._input, aggregated)])) + for input, prediction in zip(self._input, aggregated)])) class FakeNumpyModel(): @@ -391,8 +388,7 @@ def test_default_inference_fn(self): equal_to([( input[0], AnomalyResult(example=input[1], predictions=[prediction])) - for input, - prediction in zip(input, expected_predictions)])) + for input, prediction in zip(input, expected_predictions)])) def test_run_inference_args(self): model_handler = SklearnModelHandlerNumpy(model_uri="unused") diff --git a/sdks/python/apache_beam/ml/inference/base_test.py b/sdks/python/apache_beam/ml/inference/base_test.py index 7f38ec074c3a..6497de3fe9d5 100644 --- a/sdks/python/apache_beam/ml/inference/base_test.py +++ b/sdks/python/apache_beam/ml/inference/base_test.py @@ -772,9 +772,8 @@ def mult_two(element): with TestPipeline() as pipeline: examples = [1, 5, 3, 10] keyed_examples = [(i, example) for i, example in enumerate(examples)] - expected = [ - (i, ((example * 2) + 1) * 2) for i, example in enumerate(examples) - ] + expected = [(i, ((example * 2) + 1) * 2) + for i, example in enumerate(examples)] pcoll = pipeline | 'start' >> beam.Create(keyed_examples) actual = pcoll | base.RunInference( base.KeyedModelHandler(FakeModelHandler()).with_preprocess_fn( @@ -792,9 +791,8 @@ def mult_two_keyed(element): examples = [1, 5, 3, 10] keyed_examples = [(i, example) for i, example in enumerate(examples)] expected = [((2 * example) + 1) * 2 for example in examples] - keyed_expected = [ - (i, ((2 * example) + 1) * 2) for i, example in enumerate(examples) - ] + keyed_expected = [(i, ((2 * example) + 1) * 2) + for i, example in enumerate(examples)] model_handler = base.MaybeKeyedModelHandler(FakeModelHandler()) pcoll = pipeline | 'Unkeyed' >> beam.Create(examples) diff --git a/sdks/python/apache_beam/ml/inference/huggingface_inference.py b/sdks/python/apache_beam/ml/inference/huggingface_inference.py index 5190551dcba0..181fa1b95afe 100644 --- a/sdks/python/apache_beam/ml/inference/huggingface_inference.py +++ b/sdks/python/apache_beam/ml/inference/huggingface_inference.py @@ -47,15 +47,16 @@ "HuggingFacePipelineModelHandler", ] -TensorInferenceFn = Callable[[ - Sequence[Union[torch.Tensor, tf.Tensor]], - Union[AutoModel, TFAutoModel], - str, - Optional[dict[str, Any]], - Optional[str], -], - Iterable[PredictionResult], - ] +TensorInferenceFn = Callable[ + [ + Sequence[Union[torch.Tensor, tf.Tensor]], + Union[AutoModel, TFAutoModel], + str, + Optional[dict[str, Any]], + Optional[str], + ], + Iterable[PredictionResult], +] KeyedTensorInferenceFn = Callable[[ Sequence[dict[str, Union[torch.Tensor, tf.Tensor]]], diff --git a/sdks/python/apache_beam/ml/inference/huggingface_inference_test.py b/sdks/python/apache_beam/ml/inference/huggingface_inference_test.py index b26927781cce..336d5f6512aa 100644 --- a/sdks/python/apache_beam/ml/inference/huggingface_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/huggingface_inference_test.py @@ -75,8 +75,7 @@ def test_predict_tensor(self): inference_fn=fake_inference_fn_tensor) batched_examples = [tf.constant([1]), tf.constant([10]), tf.constant([100])] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( + PredictionResult(ex, pred) for ex, pred in zip( batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) ] @@ -94,8 +93,7 @@ def test_predict_tensor_with_inference_args(self): inference_args={"add": True}) batched_examples = [tf.constant([1]), tf.constant([10]), tf.constant([100])] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( + PredictionResult(ex, pred) for ex, pred in zip( batched_examples, [ tf.math.add(tf.math.multiply(n, 10), 10) for n in batched_examples diff --git a/sdks/python/apache_beam/ml/inference/onnx_inference_test.py b/sdks/python/apache_beam/ml/inference/onnx_inference_test.py index ab87c4cceef2..e9e017661d41 100644 --- a/sdks/python/apache_beam/ml/inference/onnx_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/onnx_inference_test.py @@ -85,8 +85,7 @@ def get_one_feature_samples(self): def get_one_feature_predictions(self): return [ - PredictionResult(ex, pred) for ex, - pred in zip( + PredictionResult(ex, pred) for ex, pred in zip( self.get_one_feature_samples(), [example * 2.0 + 0.5 for example in self.get_one_feature_samples()]) ] @@ -101,12 +100,10 @@ def get_two_feature_examples(self): def get_two_feature_predictions(self): return [ - PredictionResult(ex, pred) for ex, - pred in zip( - self.get_two_feature_examples(), - [ - f1 * 2.0 + f2 * 3 + 0.5 for f1, - f2 in self.get_two_feature_examples() + PredictionResult(ex, pred) for ex, pred in zip( + self.get_two_feature_examples(), [ + f1 * 2.0 + f2 * 3 + 0.5 + for f1, f2 in self.get_two_feature_examples() ]) ] @@ -206,17 +203,23 @@ def test_onnx_pytorch_run_inference(self): model = self.test_data_and_model.get_torch_one_feature_model() path = os.path.join(self.tmpdir, 'my_onnx_pytorch_path') dummy_input = torch.randn(4, 1, requires_grad=True) - torch.onnx.export(model, - dummy_input, # model input - path, # where to save the model - export_params=True, # store the trained parameter weights - opset_version=10, # the ONNX version - do_constant_folding=True, # whether to execute constant- - # folding for optimization - input_names = ['input'], # model's input names - output_names = ['output'], # model's output names - dynamic_axes={'input' : {0 : 'batch_size'}, - 'output' : {0 : 'batch_size'}}) + torch.onnx.export( + model, + dummy_input, # model input + path, # where to save the model + export_params=True, # store the trained parameter weights + opset_version=10, # the ONNX version + do_constant_folding=True, # whether to execute constant- + # folding for optimization + input_names=['input'], # model's input names + output_names=['output'], # model's output names + dynamic_axes={ + 'input': { + 0: 'batch_size' + }, 'output': { + 0: 'batch_size' + } + }) inference_runner = TestOnnxModelHandler(path) inference_session = ort.InferenceSession( @@ -305,17 +308,23 @@ def test_onnx_sklearn_run_inference(self): class OnnxPytorchRunInferencePipelineTest(OnnxTestBase): def exportModelToOnnx(self, model, path): dummy_input = torch.randn(4, 2, requires_grad=True) - torch.onnx.export(model, - dummy_input, # model input - path, # where to save the model - export_params=True, # store the trained parameter weights - opset_version=10, # the ONNX version - do_constant_folding=True, # whether to execute constant - # folding for optimization - input_names = ['input'], # odel's input names - output_names = ['output'], # model's output names - dynamic_axes={'input' : {0 : 'batch_size'}, - 'output' : {0 : 'batch_size'}}) + torch.onnx.export( + model, + dummy_input, # model input + path, # where to save the model + export_params=True, # store the trained parameter weights + opset_version=10, # the ONNX version + do_constant_folding=True, # whether to execute constant + # folding for optimization + input_names=['input'], # odel's input names + output_names=['output'], # model's output names + dynamic_axes={ + 'input': { + 0: 'batch_size' + }, 'output': { + 0: 'batch_size' + } + }) def test_pipeline_local_model_simple(self): with TestPipeline() as pipeline: diff --git a/sdks/python/apache_beam/ml/inference/pytorch_inference_it_test.py b/sdks/python/apache_beam/ml/inference/pytorch_inference_it_test.py index 2cc49be54599..035047547a77 100644 --- a/sdks/python/apache_beam/ml/inference/pytorch_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/pytorch_inference_it_test.py @@ -139,8 +139,8 @@ def test_torch_run_inference_coco_maskrcnn_resnet50_fpn_v1_and_v2(self): output_file = '/'.join([output_file_dir, str(uuid.uuid4()), 'result.txt']) model_state_dict_paths = [ - 'gs://apache-beam-ml/models/torchvision.models.detection.maskrcnn_resnet50_fpn.pth', # pylint: disable=line-too-long - 'gs://apache-beam-ml/models/torchvision.models.detection.maskrcnn_resnet50_fpn_v2.pth' # pylint: disable=line-too-long + 'gs://apache-beam-ml/models/torchvision.models.detection.maskrcnn_resnet50_fpn.pth', # pylint: disable=line-too-long + 'gs://apache-beam-ml/models/torchvision.models.detection.maskrcnn_resnet50_fpn_v2.pth' # pylint: disable=line-too-long ] images_dir = 'gs://apache-beam-ml/datasets/coco/raw-data/val2017' extra_opts = { diff --git a/sdks/python/apache_beam/ml/inference/pytorch_inference_test.py b/sdks/python/apache_beam/ml/inference/pytorch_inference_test.py index dd5793af2dd1..91556f05801f 100644 --- a/sdks/python/apache_beam/ml/inference/pytorch_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/pytorch_inference_test.py @@ -60,10 +60,8 @@ ] TWO_FEATURES_PREDICTIONS = [ - PredictionResult(ex, pred) for ex, - pred in zip( - TWO_FEATURES_EXAMPLES, - torch.Tensor( + PredictionResult(ex, pred) for ex, pred in zip( + TWO_FEATURES_EXAMPLES, torch.Tensor( [f1 * 2.0 + f2 * 3 + 0.5 for f1, f2 in TWO_FEATURES_EXAMPLES]).reshape(-1, 1)) ] @@ -95,20 +93,17 @@ ] KEYED_TORCH_PREDICTIONS = [ - PredictionResult(ex, pred) for ex, - pred in zip( - KEYED_TORCH_EXAMPLES, - torch.Tensor([(example['k1'] * 2.0 + 0.5) + (example['k2'] * 2.0 + 0.5) - for example in KEYED_TORCH_EXAMPLES]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + KEYED_TORCH_EXAMPLES, torch.Tensor( + [(example['k1'] * 2.0 + 0.5) + (example['k2'] * 2.0 + 0.5) + for example in KEYED_TORCH_EXAMPLES]).reshape(-1, 1)) ] KEYED_TORCH_HELPER_PREDICTIONS = [ - PredictionResult(ex, pred) for ex, - pred in zip( - KEYED_TORCH_EXAMPLES, - torch.Tensor([(example['k1'] * 2.0 + 0.5) + - (example['k2'] * 2.0 + 0.5) + 0.5 - for example in KEYED_TORCH_EXAMPLES]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + KEYED_TORCH_EXAMPLES, torch.Tensor( + [(example['k1'] * 2.0 + 0.5) + (example['k2'] * 2.0 + 0.5) + 0.5 + for example in KEYED_TORCH_EXAMPLES]).reshape(-1, 1)) ] KEYED_TORCH_DICT_OUT_PREDICTIONS = [ @@ -139,8 +134,8 @@ def __init__(self, device, *, inference_fn=default_keyed_tensor_inference_fn): def _compare_prediction_result(x, y): if isinstance(x.example, dict): example_equals = all( - torch.equal(x, y) for x, - y in zip(x.example.values(), y.example.values())) + torch.equal(x, y) + for x, y in zip(x.example.values(), y.example.values())) else: example_equals = torch.equal(x.example, y.example) if not example_equals: @@ -148,8 +143,8 @@ def _compare_prediction_result(x, y): if isinstance(x.inference, dict): return all( - torch.equal(x, y) for x, - y in zip(x.inference.values(), y.inference.values())) + torch.equal(x, y) + for x, y in zip(x.inference.values(), y.inference.values())) return torch.equal(x.inference, y.inference) @@ -157,10 +152,9 @@ def _compare_prediction_result(x, y): def custom_tensor_inference_fn( batch, model, device, inference_args, model_id=None): predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( - batch, - torch.Tensor([item * 2.0 + 1.5 for item in batch]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + batch, torch.Tensor([item * 2.0 + 1.5 + for item in batch]).reshape(-1, 1)) ] return predictions @@ -221,11 +215,9 @@ def test_run_inference_single_tensor_feature(self): torch.from_numpy(np.array([10.0], dtype="float32")), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( - examples, - torch.Tensor([example * 2.0 + 0.5 - for example in examples]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + examples, torch.Tensor( + [example * 2.0 + 0.5 for example in examples]).reshape(-1, 1)) ] model = PytorchLinearRegression(input_dim=1, output_dim=1) @@ -274,11 +266,9 @@ def test_run_inference_custom(self): torch.from_numpy(np.array([10.0], dtype="float32")), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( - examples, - torch.Tensor([example * 2.0 + 1.5 - for example in examples]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + examples, torch.Tensor( + [example * 2.0 + 1.5 for example in examples]).reshape(-1, 1)) ] model = PytorchLinearRegression(input_dim=1, output_dim=1) @@ -385,11 +375,9 @@ def test_run_inference_helper(self): torch.from_numpy(np.array([10.0], dtype="float32")), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( - examples, - torch.Tensor([example * 2.0 + 1.0 - for example in examples]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + examples, torch.Tensor( + [example * 2.0 + 1.0 for example in examples]).reshape(-1, 1)) ] gen_fn = make_tensor_model_fn('generate') @@ -663,11 +651,9 @@ def test_pipeline_gcs_model(self): examples = torch.from_numpy( np.array([1, 5, 3, 10], dtype="float32").reshape(-1, 1)) expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( - examples, - torch.Tensor([example * 2.0 + 0.5 - for example in examples]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + examples, torch.Tensor( + [example * 2.0 + 0.5 for example in examples]).reshape(-1, 1)) ] gs_pth = 'gs://apache-beam-ml/models/' \ @@ -691,11 +677,9 @@ def test_pipeline_gcs_model_control_batching(self): examples = torch.from_numpy( np.array([1, 5, 3, 10], dtype="float32").reshape(-1, 1)) expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( - examples, - torch.Tensor([example * 2.0 + 0.5 - for example in examples]).reshape(-1, 1)) + PredictionResult(ex, pred) for ex, pred in zip( + examples, torch.Tensor( + [example * 2.0 + 0.5 for example in examples]).reshape(-1, 1)) ] def batch_validator_tensor_inference_fn( diff --git a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py index 9259118c73d2..400ac77cf498 100644 --- a/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/sklearn_inference_test.py @@ -75,8 +75,8 @@ def _compare_dataframe_predictions(a_in, b_in): example_equal = pandas.DataFrame.equals(a.example, b.example) if isinstance(a.inference, dict): return all( - math.floor(a) == math.floor(b) for a, - b in zip(a.inference.values(), b.inference.values())) and example_equal + math.floor(a) == math.floor(b) for a, b in zip( + a.inference.values(), b.inference.values())) and example_equal inference_equal = math.floor(a.inference) == math.floor(b.inference) return inference_equal and example_equal and keys_equal diff --git a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py index 1556eed4fb8c..9b23963723d1 100644 --- a/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorflow_inference_test.py @@ -115,8 +115,7 @@ def test_predict_tensor(self): tf.convert_to_tensor(numpy.array([100])), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( + PredictionResult(ex, pred) for ex, pred in zip( batched_examples, [tf.math.multiply(n, 10) for n in batched_examples]) ] @@ -159,8 +158,8 @@ def fake_batching_inference_fn( numpy.array([200.1, 300.2, 400.3], dtype='float32')), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip(examples, [tf.math.multiply(n, 2) for n in examples]) + PredictionResult(ex, pred) for ex, pred in zip( + examples, [tf.math.multiply(n, 2) for n in examples]) ] pcoll = pipeline | 'start' >> beam.Create(examples) @@ -206,8 +205,8 @@ def fake_batching_inference_fn( numpy.array([200.1, 300.2, 400.3], dtype='float32')), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip(examples, [tf.math.multiply(n, 2) for n in examples]) + PredictionResult(ex, pred) for ex, pred in zip( + examples, [tf.math.multiply(n, 2) for n in examples]) ] pcoll = pipeline | 'start' >> beam.Create(examples) @@ -249,8 +248,8 @@ def fake_batching_inference_fn( numpy.array([200.1, 300.2, 400.3], dtype='float32'), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip(examples, [numpy.multiply(n, 2) for n in examples]) + PredictionResult(ex, pred) for ex, pred in zip( + examples, [numpy.multiply(n, 2) for n in examples]) ] pcoll = pipeline | 'start' >> beam.Create(examples) @@ -293,8 +292,8 @@ def fake_inference_fn( numpy.array([200.1, 300.2, 400.3], dtype='float32'), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip(examples, [numpy.multiply(n, 2) for n in examples]) + PredictionResult(ex, pred) for ex, pred in zip( + examples, [numpy.multiply(n, 2) for n in examples]) ] pcoll = pipeline | 'start' >> beam.Create(examples) @@ -315,8 +314,7 @@ def test_predict_tensor_with_args(self): tf.convert_to_tensor(numpy.array([100])), ] expected_predictions = [ - PredictionResult(ex, pred) for ex, - pred in zip( + PredictionResult(ex, pred) for ex, pred in zip( batched_examples, [ tf.math.add(tf.math.multiply(n, 10), 10) for n in batched_examples @@ -338,8 +336,7 @@ def test_predict_keyed_numpy(self): ('k3', numpy.array([100], dtype=numpy.int64)), ] expected_predictions = [ - (ex[0], PredictionResult(ex[1], pred)) for ex, - pred in zip( + (ex[0], PredictionResult(ex[1], pred)) for ex, pred in zip( batched_examples, [numpy.multiply(n[1], 10) for n in batched_examples]) ] @@ -358,8 +355,7 @@ def test_predict_keyed_tensor(self): ('k3', tf.convert_to_tensor(numpy.array([100]))), ] expected_predictions = [ - (ex[0], PredictionResult(ex[1], pred)) for ex, - pred in zip( + (ex[0], PredictionResult(ex[1], pred)) for ex, pred in zip( batched_examples, [tf.math.multiply(n[1], 10) for n in batched_examples]) ] @@ -370,8 +366,9 @@ def test_predict_keyed_tensor(self): def test_load_model_exception(self): with self.assertRaises(ValueError): tensorflow_inference._load_model( - "https://tfhub.dev/google/imagenet/mobilenet_v1_075_192/quantops/classification/3", # pylint: disable=line-too-long - None, {}) + "https://tfhub.dev/google/imagenet/mobilenet_v1_075_192/quantops/classification/3", # pylint: disable=line-too-long + None, + {}) @pytest.mark.uses_tf diff --git a/sdks/python/apache_beam/ml/inference/tensorrt_inference_test.py b/sdks/python/apache_beam/ml/inference/tensorrt_inference_test.py index 86bb7f695d3c..cb010e82cfca 100644 --- a/sdks/python/apache_beam/ml/inference/tensorrt_inference_test.py +++ b/sdks/python/apache_beam/ml/inference/tensorrt_inference_test.py @@ -54,16 +54,14 @@ ] SINGLE_FEATURE_PREDICTIONS = [ - PredictionResult(ex, pred) for ex, - pred in zip( + PredictionResult(ex, pred) for ex, pred in zip( SINGLE_FEATURE_EXAMPLES, [[np.array([example * 2.0 + 0.5], dtype=np.float32)] for example in SINGLE_FEATURE_EXAMPLES]) ] SINGLE_FEATURE_CUSTOM_PREDICTIONS = [ - PredictionResult(ex, pred) for ex, - pred in zip( + PredictionResult(ex, pred) for ex, pred in zip( SINGLE_FEATURE_EXAMPLES, [[np.array([(example * 2.0 + 0.5) * 2], dtype=np.float32)] for example in SINGLE_FEATURE_EXAMPLES]) @@ -77,20 +75,18 @@ ] TWO_FEATURES_PREDICTIONS = [ - PredictionResult(ex, pred) for ex, - pred in zip( - TWO_FEATURES_EXAMPLES, - [[ - np.array([example[0] * 2.0 + example[1] * 3 + 0.5], - dtype=np.float32) + PredictionResult(ex, pred) for ex, pred in zip( + TWO_FEATURES_EXAMPLES, [[ + np.array([example[0] * 2.0 + example[1] * 3 + + 0.5], dtype=np.float32) ] for example in TWO_FEATURES_EXAMPLES]) ] def _compare_prediction_result(a, b): return ((a.example == b.example).all() and all( - np.array_equal(actual, expected) for actual, - expected in zip(a.inference, b.inference))) + np.array_equal(actual, expected) + for actual, expected in zip(a.inference, b.inference))) def _assign_or_fail(args): @@ -140,8 +136,8 @@ def _custom_tensorRT_inference_fn(batch, engine, inference_args): return [ PredictionResult( - x, [prediction[idx] * 2 for prediction in cpu_allocations]) for idx, - x in enumerate(batch) + x, [prediction[idx] * 2 for prediction in cpu_allocations]) + for idx, x in enumerate(batch) ] diff --git a/sdks/python/apache_beam/ml/inference/utils.py b/sdks/python/apache_beam/ml/inference/utils.py index 0bb2031b3731..dfbb01a29049 100644 --- a/sdks/python/apache_beam/ml/inference/utils.py +++ b/sdks/python/apache_beam/ml/inference/utils.py @@ -54,8 +54,8 @@ def _convert_to_result( dict(zip(predictions.keys(), v)) for v in zip(*predictions.values()) ] return [ - PredictionResult(x, y, model_id) for x, - y in zip(batch, predictions_per_tensor) + PredictionResult(x, y, model_id) + for x, y in zip(batch, predictions_per_tensor) ] return [PredictionResult(x, y, model_id) for x, y in zip(batch, predictions)] diff --git a/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search_it_test.py b/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search_it_test.py index 6dc95b5b46c2..03334b0331bf 100644 --- a/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search_it_test.py +++ b/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search_it_test.py @@ -345,7 +345,7 @@ def test_batched_metadata_filter_vector_search(self): handler = BigQueryVectorSearchEnrichmentHandler( vector_search_parameters=params, min_batch_size=2, # Force batching - max_batch_size=2 # Process 2 chunks at a time + max_batch_size=2 # Process 2 chunks at a time ) expected_chunks = [ @@ -599,8 +599,7 @@ def test_metadata_filter_leakage(self): handler = BigQueryVectorSearchEnrichmentHandler( vector_search_parameters=params, min_batch_size=2, # Force batching - max_batch_size=2 - ) + max_batch_size=2) with TestPipeline(is_integration_test=True) as p: result = (p | beam.Create(test_chunks) | Enrichment(handler)) @@ -757,8 +756,7 @@ def test_condition_batching(self): handler = BigQueryVectorSearchEnrichmentHandler( vector_search_parameters=params, min_batch_size=10, # Force batching - max_batch_size=100 - ) + max_batch_size=100) with TestPipeline(is_integration_test=True) as p: result = (p | beam.Create(test_chunks) | Enrichment(handler)) @@ -855,9 +853,8 @@ def test_invalid_query(self): columns=['content'], neighbor_count=1, metadata_restriction_template=( - "language = '{language}'" # Invalid template - ) - ) + "language = '{language}'" # Invalid template + )) handler = BigQueryVectorSearchEnrichmentHandler( vector_search_parameters=params) @@ -889,8 +886,7 @@ def test_missing_embedding(self): embedding=None, # Missing embedding content=Content(text="test query"), metadata={"language": "en"}, - index=0 - ) + index=0) ] params = BigQueryVectorSearchParameters( diff --git a/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py b/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py index a2a95faacffe..6939d09e2bad 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py @@ -52,22 +52,26 @@ ('metadata', str)]) registry.register_coder(TestRow, RowCoder) -CustomSpecsRow = NamedTuple('CustomSpecsRow', [ - ('custom_id', str), # For id_spec test - ('embedding_vec', List[float]), # For embedding_spec test - ('content_col', str), # For content_spec test - ('metadata', str) -]) +CustomSpecsRow = NamedTuple( + 'CustomSpecsRow', + [ + ('custom_id', str), # For id_spec test + ('embedding_vec', List[float]), # For embedding_spec test + ('content_col', str), # For content_spec test + ('metadata', str) + ]) registry.register_coder(CustomSpecsRow, RowCoder) -MetadataConflictRow = NamedTuple('MetadataConflictRow', [ - ('id', str), - ('source', str), # For metadata_spec and composite key - ('timestamp', str), # For metadata_spec and composite key - ('content', str), - ('embedding', List[float]), - ('metadata', str) -]) +MetadataConflictRow = NamedTuple( + 'MetadataConflictRow', + [ + ('id', str), + ('source', str), # For metadata_spec and composite key + ('timestamp', str), # For metadata_spec and composite key + ('content', str), + ('embedding', List[float]), + ('metadata', str) + ]) registry.register_coder(MetadataConflictRow, RowCoder) _LOGGER = logging.getLogger(__name__) diff --git a/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py b/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py index 7d2caa67868a..35cd65ff7a94 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py @@ -68,8 +68,7 @@ def __init__( self, write_config: Dict[str, Any], *, # Force keyword arguments - schema_config: Optional[SchemaConfig] = None - ): + schema_config: Optional[SchemaConfig] = None): """Configuration for writing vectors to BigQuery using managed transforms. Supports both default schema (id, embedding, content, metadata columns) and @@ -129,11 +128,9 @@ def _default_chunk_to_dict_fn(chunk: Chunk): 'id': chunk.id, 'embedding': chunk.embedding.dense_embedding, 'content': chunk.content.text, - 'metadata': [ - { - "key": k, "value": str(v) - } for k, v in chunk.metadata.items() - ] + 'metadata': [{ + "key": k, "value": str(v) + } for k, v in chunk.metadata.items()] } diff --git a/sdks/python/apache_beam/ml/rag/ingestion/bigquery_it_test.py b/sdks/python/apache_beam/ml/rag/ingestion/bigquery_it_test.py index 6c034a1aeae7..7df662ab0554 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/bigquery_it_test.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/bigquery_it_test.py @@ -139,8 +139,7 @@ def test_custom_schema(self): }] }, chunk_to_dict_fn=lambda chunk: { - 'id': chunk.id, - 'embedding': chunk.embedding.dense_embedding, + 'id': chunk.id, 'embedding': chunk.embedding.dense_embedding, 'source': chunk.metadata.get('source') }) config = BigQueryVectorWriterConfig( diff --git a/sdks/python/apache_beam/ml/transforms/base.py b/sdks/python/apache_beam/ml/transforms/base.py index 57a5efd3ff0e..bd3a5f2b3dc2 100644 --- a/sdks/python/apache_beam/ml/transforms/base.py +++ b/sdks/python/apache_beam/ml/transforms/base.py @@ -628,8 +628,8 @@ def create_ptransform_list(self): self._parent_artifact_location, uuid.uuid4().hex[:6]), artifact_mode=self.artifact_mode) append_transform = hasattr(current_ptransform, 'append_transform') - if (type(current_ptransform) != - previous_ptransform_type) or not append_transform: + if (type(current_ptransform) + != previous_ptransform_type) or not append_transform: ptransform_list.append(current_ptransform) previous_ptransform_type = type(current_ptransform) # If different PTransform is appended to the list and the PTransform diff --git a/sdks/python/apache_beam/ml/transforms/base_test.py b/sdks/python/apache_beam/ml/transforms/base_test.py index 1ef01acca18a..39aff233aecd 100644 --- a/sdks/python/apache_beam/ml/transforms/base_test.py +++ b/sdks/python/apache_beam/ml/transforms/base_test.py @@ -405,8 +405,10 @@ def test_handler_with_dict_inputs(self): 'x': "Apache Beam" }, ] - expected_data = [{key: value[::-1] - for key, value in d.items()} for d in data] + expected_data = [{ + key: value[::-1] + for key, value in d.items() + } for d in data] with beam.Pipeline() as p: result = ( p @@ -430,8 +432,10 @@ def test_handler_with_batch_sizes(self): 'x': "Apache Beam" }, ] * 100 - expected_data = [{key: value[::-1] - for key, value in d.items()} for d in data] + expected_data = [{ + key: value[::-1] + for key, value in d.items() + } for d in data] with beam.Pipeline() as p: result = ( p @@ -456,8 +460,7 @@ def test_handler_on_multiple_columns(self): embedding_config = FakeEmbeddingsManager(columns=['x', 'y']) expected_data = [{ key: (value[::-1] if key in embedding_config.columns else value) - for key, - value in d.items() + for key, value in d.items() } for d in data] with beam.Pipeline() as p: result = ( diff --git a/sdks/python/apache_beam/ml/transforms/embeddings/huggingface.py b/sdks/python/apache_beam/ml/transforms/embeddings/huggingface.py index e492cb164222..2d217a5ca3e2 100644 --- a/sdks/python/apache_beam/ml/transforms/embeddings/huggingface.py +++ b/sdks/python/apache_beam/ml/transforms/embeddings/huggingface.py @@ -215,10 +215,10 @@ def __init__( self, hf_token: Optional[str], columns: list[str], - model_name: Optional[str] = None, # example: "sentence-transformers/all-MiniLM-l6-v2" # pylint: disable=line-too-long + model_name: Optional[str] = None, # example: "sentence-transformers/all-MiniLM-l6-v2" # pylint: disable=line-too-long api_url: Optional[str] = None, **kwargs, - ): + ): super().__init__(columns=columns, **kwargs) self._authorization_token = {"Authorization": f"Bearer {hf_token}"} self._model_name = model_name diff --git a/sdks/python/apache_beam/ml/transforms/tft_test.py b/sdks/python/apache_beam/ml/transforms/tft_test.py index 6afe9b5ab302..b12849afc816 100644 --- a/sdks/python/apache_beam/ml/transforms/tft_test.py +++ b/sdks/python/apache_beam/ml/transforms/tft_test.py @@ -404,13 +404,19 @@ def tearDown(self): def test_compute_and_apply_vocabulary_inputs(self): num_elements = 100 num_instances = num_elements + 1 - input_data = [{ - 'x': '%.10i' % i, # Front-padded to facilitate lexicographic sorting. - } for i in range(num_instances)] + input_data = [ + { + 'x': '%.10i' % + i, # Front-padded to facilitate lexicographic sorting. + } for i in range(num_instances) + ] - expected_data = [{ - 'x': (len(input_data) - 1) - i, # Due to reverse lexicographic sorting. - } for i in range(len(input_data))] + expected_data = [ + { + 'x': (len(input_data) - 1) - + i, # Due to reverse lexicographic sorting. + } for i in range(len(input_data)) + ] with beam.Pipeline() as p: actual_data = ( diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index 2ddce625b210..b51b85963c69 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -143,8 +143,8 @@ class _DictUnionAction(argparse.Action): than one of the values, the last value takes precedence. """ def __call__(self, parser, namespace, values, option_string=None): - if not hasattr(namespace, - self.dest) or getattr(namespace, self.dest) is None: + if not hasattr(namespace, self.dest) or getattr(namespace, + self.dest) is None: setattr(namespace, self.dest, {}) getattr(namespace, self.dest).update(values) @@ -188,8 +188,8 @@ def _add_entry(self, key, value): self._custom_audit_entries[f"x-goog-custom-audit-{key}"] = value def __call__(self, parser, namespace, values, option_string=None): - if not hasattr(namespace, - self.dest) or getattr(namespace, self.dest) is None: + if not hasattr(namespace, self.dest) or getattr(namespace, + self.dest) is None: setattr(namespace, self.dest, {}) self._custom_audit_entries = getattr(namespace, self.dest) @@ -1218,8 +1218,7 @@ def _add_argparse_args(cls, parser): type=str, choices=['NONE', 'THROUGHPUT_BASED'], default=None, # Meaning unset, distinct from 'NONE' meaning don't scale - help= - ('If and how to autoscale the workerpool.')) + help=('If and how to autoscale the workerpool.')) parser.add_argument( '--worker_machine_type', '--machine_type', diff --git a/sdks/python/apache_beam/options/pipeline_options_validator.py b/sdks/python/apache_beam/options/pipeline_options_validator.py index 7c07e5a1e6c7..ebe9c8f223ce 100644 --- a/sdks/python/apache_beam/options/pipeline_options_validator.py +++ b/sdks/python/apache_beam/options/pipeline_options_validator.py @@ -416,7 +416,7 @@ def validate_endpoint_url(self, endpoint_url): return False if url_parts.scheme not in ['http', 'https']: return False - if set( - url_parts.netloc) <= set(string.ascii_letters + string.digits + '-.'): + if set(url_parts.netloc) <= set(string.ascii_letters + string.digits + + '-.'): return True return False diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py index a68deb2be0b8..c011732f9352 100644 --- a/sdks/python/apache_beam/pipeline.py +++ b/sdks/python/apache_beam/pipeline.py @@ -857,7 +857,6 @@ def _generate_unique_label( unique_suffix = uuid.uuid4().hex[:6] return '%s_%s' % (transform.label, unique_suffix) - def _infer_result_type( self, transform, # type: ptransform.PTransform @@ -1004,8 +1003,8 @@ def visit_transform(self, transform_node): if (isinstance(output.element_type, typehints.TupleHint.TupleConstraint) and len(output.element_type.tuple_types) == 2 and - pcoll.element_type.tuple_types[0] == - output.element_type.tuple_types[0]): + pcoll.element_type.tuple_types[0] + == output.element_type.tuple_types[0]): output.requires_deterministic_key_coder = ( deterministic_key_coders and transform_node.full_label) for side_input in transform_node.transform.side_inputs: @@ -1057,8 +1056,10 @@ def from_runner_api( p = Pipeline( runner=runner, options=options, - display_data={str(ix): d - for ix, d in enumerate(proto.display_data)}) + display_data={ + str(ix): d + for ix, d in enumerate(proto.display_data) + }) from apache_beam.runners import pipeline_context context = pipeline_context.PipelineContext( proto.components, requirements=proto.requirements) @@ -1177,7 +1178,7 @@ def __init__( full_label, # type: str main_inputs, # type: Optional[Mapping[str, Union[pvalue.PBegin, pvalue.PCollection]]] environment_id, # type: Optional[str] - annotations, # type: Optional[Dict[str, bytes]] + annotations, # type: Optional[Dict[str, bytes]] ): # type: (...) -> None self.parent = parent @@ -1421,13 +1422,11 @@ def transform_to_runner_api( ], inputs={ tag: context.pcollections.get_id(pc) - for tag, - pc in sorted(self.named_inputs().items()) + for tag, pc in sorted(self.named_inputs().items()) }, outputs={ tag: context.pcollections.get_id(out) - for tag, - out in sorted(self.named_outputs().items()) + for tag, out in sorted(self.named_outputs().items()) }, environment_id=environment_id, annotations=self.annotations, @@ -1468,8 +1467,8 @@ def from_runner_api( # TODO(https://github.com/apache/beam/issues/20136): use key, value pairs # instead of depending on tags with index as a suffix. indexed_side_inputs = [ - (get_sideinput_index(tag), context.pcollections.get_by_id(id)) for tag, - id in proto.inputs.items() if tag in side_input_tags + (get_sideinput_index(tag), context.pcollections.get_by_id(id)) + for tag, id in proto.inputs.items() if tag in side_input_tags ] side_inputs = [si for _, si in sorted(indexed_side_inputs)] @@ -1492,8 +1491,7 @@ def from_runner_api( result.add_part(part) result.outputs = { None if tag == 'None' else tag: context.pcollections.get_by_id(id) - for tag, - id in proto.outputs.items() + for tag, id in proto.outputs.items() } # This annotation is expected by some runners. if proto.spec.urn == common_urns.primitives.PAR_DO.urn: diff --git a/sdks/python/apache_beam/pipeline_test.py b/sdks/python/apache_beam/pipeline_test.py index 52ecb1a1d575..9cdab3d55aba 100644 --- a/sdks/python/apache_beam/pipeline_test.py +++ b/sdks/python/apache_beam/pipeline_test.py @@ -1441,11 +1441,11 @@ def file_artifact(path, hash, staged_name): 'e1': beam_runner_api_pb2.Environment( dependencies=[file_artifact('a1', 'x', 'dest')]), 'e2': beam_runner_api_pb2.Environment( - dependencies=[file_artifact('a2', 'x', 'dest')]), - # Different hash. + dependencies=[file_artifact('a2', 'x', 'dest') + ]), # Different hash. 'e3': beam_runner_api_pb2.Environment( - dependencies=[file_artifact('a3', 'y', 'dest')]), - # Different destination. + dependencies=[file_artifact('a3', 'y', 'dest') + ]), # Different destination. 'e4': beam_runner_api_pb2.Environment( dependencies=[file_artifact('a4', 'y', 'dest2')]), # Multiple files with same hash and destinations. @@ -1458,14 +1458,12 @@ def file_artifact(path, hash, staged_name): dependencies=[ file_artifact('a2', 'x', 'dest'), file_artifact('b2', 'xb', 'destB') - ]), - # Overlapping, but not identical, files. + ]), # Overlapping, but not identical, files. 'e7': beam_runner_api_pb2.Environment( dependencies=[ file_artifact('a1', 'x', 'dest'), file_artifact('b2', 'y', 'destB') - ]), - # Same files as first, but differing other properties. + ]), # Same files as first, but differing other properties. 'e0': beam_runner_api_pb2.Environment( resource_hints={'hint': b'value'}, dependencies=[file_artifact('a1', 'x', 'dest')]), diff --git a/sdks/python/apache_beam/pvalue.py b/sdks/python/apache_beam/pvalue.py index 5a400570cf18..cee3b8f2bca2 100644 --- a/sdks/python/apache_beam/pvalue.py +++ b/sdks/python/apache_beam/pvalue.py @@ -426,8 +426,7 @@ def _from_runtime_iterable(it, options): def _view_options(self): return { - 'data': self._data, - # For non-fn-api runners. + 'data': self._data, # For non-fn-api runners. 'window_mapping_fn': self._data.window_mapping_fn, 'coder': self._windowed_coder(), } @@ -544,8 +543,7 @@ def _from_runtime_iterable(it, options): def _side_input_data(self) -> SideInputData: return SideInputData( common_urns.side_inputs.ITERABLE.urn, - self._window_mapping_fn, - lambda iterable: iterable) + self._window_mapping_fn, lambda iterable: iterable) @property def element_type(self): @@ -620,8 +618,7 @@ def _from_runtime_iterable(it, options): def _side_input_data(self) -> SideInputData: return SideInputData( common_urns.side_inputs.MULTIMAP.urn, - self._window_mapping_fn, - lambda x: x) + self._window_mapping_fn, lambda x: x) def requires_keyed_input(self): return True @@ -681,8 +678,8 @@ def __eq__(self, other): return ( type(self) == type(other) and len(self.__dict__) == len(other.__dict__) and all( - s == o for s, - o in zip(self.__dict__.items(), other.__dict__.items()))) + s == o + for s, o in zip(self.__dict__.items(), other.__dict__.items()))) def __reduce__(self): return _make_Row, tuple(self.__dict__.items()) diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py index a40342b7fd27..abe3792b4d8b 100644 --- a/sdks/python/apache_beam/runners/common.py +++ b/sdks/python/apache_beam/runners/common.py @@ -466,11 +466,11 @@ class DoFnInvoker(object): A DoFnInvoker describes a particular way for invoking methods of a DoFn represented by a given DoFnSignature.""" - - def __init__(self, - output_handler, # type: _OutputHandler - signature # type: DoFnSignature - ): + def __init__( + self, + output_handler, # type: _OutputHandler + signature # type: DoFnSignature + ): # type: (...) -> None """ @@ -490,8 +490,9 @@ def create_invoker( signature, # type: DoFnSignature output_handler, # type: OutputHandler context=None, # type: Optional[DoFnContext] - side_inputs=None, # type: Optional[List[sideinputs.SideInputMap]] - input_args=None, input_kwargs=None, + side_inputs=None, # type: Optional[List[sideinputs.SideInputMap]] + input_args=None, + input_kwargs=None, process_invocation=True, user_state_context=None, # type: Optional[userstate.UserStateContext] bundle_finalizer_param=None # type: Optional[core._BundleFinalizerParam] @@ -542,13 +543,13 @@ def create_invoker( user_state_context, bundle_finalizer_param) - def invoke_process(self, - windowed_value, # type: WindowedValue - restriction=None, - watermark_estimator_state=None, - additional_args=None, - additional_kwargs=None - ): + def invoke_process( + self, + windowed_value, # type: WindowedValue + restriction=None, + watermark_estimator_state=None, + additional_args=None, + additional_kwargs=None): # type: (...) -> Iterable[SplitResultResidual] """Invokes the DoFn.process() function. @@ -569,11 +570,11 @@ def invoke_process(self, """ raise NotImplementedError - def invoke_process_batch(self, - windowed_batch, # type: WindowedBatch - additional_args=None, - additional_kwargs=None - ): + def invoke_process_batch( + self, + windowed_batch, # type: WindowedBatch + additional_args=None, + additional_kwargs=None): # type: (...) -> None """Invokes the DoFn.process() function. @@ -662,35 +663,35 @@ def invoke_create_tracker(self, restriction): class SimpleInvoker(DoFnInvoker): """An invoker that processes elements ignoring windowing information.""" - - def __init__(self, - output_handler, # type: OutputHandler - signature # type: DoFnSignature - ): + def __init__( + self, + output_handler, # type: OutputHandler + signature # type: DoFnSignature + ): # type: (...) -> None super().__init__(output_handler, signature) self.process_method = signature.process_method.method_value self.process_batch_method = signature.process_batch_method.method_value - def invoke_process(self, - windowed_value, # type: WindowedValue - restriction=None, - watermark_estimator_state=None, - additional_args=None, - additional_kwargs=None - ): + def invoke_process( + self, + windowed_value, # type: WindowedValue + restriction=None, + watermark_estimator_state=None, + additional_args=None, + additional_kwargs=None): # type: (...) -> Iterable[SplitResultResidual] self.output_handler.handle_process_outputs( windowed_value, self.process_method(windowed_value.value)) return [] - def invoke_process_batch(self, - windowed_batch, # type: WindowedBatch - restriction=None, - watermark_estimator_state=None, - additional_args=None, - additional_kwargs=None - ): + def invoke_process_batch( + self, + windowed_batch, # type: WindowedBatch + restriction=None, + watermark_estimator_state=None, + additional_args=None, + additional_kwargs=None): # type: (...) -> None self.output_handler.handle_process_batch_outputs( windowed_batch, self.process_batch_method(windowed_batch.values)) @@ -775,17 +776,17 @@ def __init__(self, placeholder): class PerWindowInvoker(DoFnInvoker): """An invoker that processes elements considering windowing information.""" - - def __init__(self, - output_handler, # type: OutputHandler - signature, # type: DoFnSignature - context, # type: DoFnContext - side_inputs, # type: Iterable[sideinputs.SideInputMap] - input_args, - input_kwargs, - user_state_context, # type: Optional[userstate.UserStateContext] - bundle_finalizer_param # type: Optional[core._BundleFinalizerParam] - ): + def __init__( + self, + output_handler, # type: OutputHandler + signature, # type: DoFnSignature + context, # type: DoFnContext + side_inputs, # type: Iterable[sideinputs.SideInputMap] + input_args, + input_kwargs, + user_state_context, # type: Optional[userstate.UserStateContext] + bundle_finalizer_param # type: Optional[core._BundleFinalizerParam] + ): super().__init__(output_handler, signature) self.side_inputs = side_inputs self.context = context @@ -817,8 +818,8 @@ def __init__(self, # and has_cached_window_batch_args will be set to true if the corresponding # self.args_for_process,have been updated and should be reused directly. self.recalculate_window_args = ( - self.has_windowed_inputs or 'disable_global_windowed_args_caching' in - RuntimeValueProvider.experiments) + self.has_windowed_inputs or 'disable_global_windowed_args_caching' + in RuntimeValueProvider.experiments) self.has_cached_window_args = False self.has_cached_window_batch_args = False @@ -840,13 +841,13 @@ def __init__(self, self.kwargs_for_process_batch) = _get_arg_placeholders( signature.process_batch_method, input_args, input_kwargs) - def invoke_process(self, - windowed_value, # type: WindowedValue - restriction=None, - watermark_estimator_state=None, - additional_args=None, - additional_kwargs=None - ): + def invoke_process( + self, + windowed_value, # type: WindowedValue + restriction=None, + watermark_estimator_state=None, + additional_args=None, + additional_kwargs=None): # type: (...) -> Iterable[SplitResultResidual] if not additional_args: additional_args = [] @@ -912,11 +913,11 @@ def invoke_process(self, windowed_value, additional_args, additional_kwargs) return residuals - def invoke_process_batch(self, - windowed_batch, # type: WindowedBatch - additional_args=None, - additional_kwargs=None - ): + def invoke_process_batch( + self, + windowed_batch, # type: WindowedBatch + additional_args=None, + additional_kwargs=None): # type: (...) -> None if not additional_args: @@ -941,9 +942,9 @@ def invoke_process_batch(self, def _should_process_window_for_sdf( self, - windowed_value, # type: WindowedValue + windowed_value, # type: WindowedValue additional_kwargs, - window_index=None, # type: Optional[int] + window_index=None, # type: Optional[int] ): restriction_tracker = self.invoke_create_tracker(self.restriction) watermark_estimator = self.invoke_create_watermark_estimator( @@ -976,11 +977,12 @@ def _should_process_window_for_sdf( additional_kwargs[watermark_param] = self.threadsafe_watermark_estimator return True - def _invoke_process_per_window(self, - windowed_value, # type: WindowedValue - additional_args, - additional_kwargs, - ): + def _invoke_process_per_window( + self, + windowed_value, # type: WindowedValue + additional_args, + additional_kwargs, + ): # type: (...) -> Optional[SplitResultResidual] if self.has_cached_window_args: args_for_process, kwargs_for_process = ( @@ -1149,16 +1151,17 @@ def _invoke_process_batch_per_window( self.threadsafe_watermark_estimator) @staticmethod - def _try_split(fraction, - window_index, # type: Optional[int] - stop_window_index, # type: Optional[int] - windowed_value, # type: WindowedValue + def _try_split( + fraction, + window_index, # type: Optional[int] + stop_window_index, # type: Optional[int] + windowed_value, # type: WindowedValue restriction, watermark_estimator_state, - restriction_provider, # type: RestrictionProvider - restriction_tracker, # type: RestrictionTracker - watermark_estimator, # type: WatermarkEstimator - ): + restriction_provider, # type: RestrictionProvider + restriction_tracker, # type: RestrictionTracker + watermark_estimator, # type: WatermarkEstimator + ): # type: (...) -> Optional[Tuple[Iterable[SplitResultPrimary], Iterable[SplitResultResidual], Optional[int]]] """Try to split returning a primaries, residuals and a new stop index. @@ -1398,22 +1401,22 @@ class DoFnRunner: A helper class for executing ParDo operations. """ - - def __init__(self, - fn, # type: core.DoFn - args, - kwargs, - side_inputs, # type: Iterable[sideinputs.SideInputMap] - windowing, - tagged_receivers, # type: Mapping[Optional[str], Receiver] - step_name=None, # type: Optional[str] - logging_context=None, - state=None, - scoped_metrics_container=None, - operation_name=None, - transform_id=None, - user_state_context=None, # type: Optional[userstate.UserStateContext] - ): + def __init__( + self, + fn, # type: core.DoFn + args, + kwargs, + side_inputs, # type: Iterable[sideinputs.SideInputMap] + windowing, + tagged_receivers, # type: Mapping[Optional[str], Receiver] + step_name=None, # type: Optional[str] + logging_context=None, + state=None, + scoped_metrics_container=None, + operation_name=None, + transform_id=None, + user_state_context=None, # type: Optional[userstate.UserStateContext] + ): """Initializes a DoFnRunner. Args: @@ -1620,16 +1623,16 @@ def handle_process_batch_outputs( class _OutputHandler(OutputHandler): """Processes output produced by DoFn method invocations.""" - - def __init__(self, - window_fn, - main_receivers, # type: Receiver - tagged_receivers, # type: Mapping[Optional[str], Receiver] - per_element_output_counter, - output_batch_converter, # type: Optional[BatchConverter] - process_yields_batches, # type: bool - process_batch_yields_elements, # type: bool - ): + def __init__( + self, + window_fn, + main_receivers, # type: Receiver + tagged_receivers, # type: Mapping[Optional[str], Receiver] + per_element_output_counter, + output_batch_converter, # type: Optional[BatchConverter] + process_yields_batches, # type: bool + process_batch_yields_elements, # type: bool + ): """Initializes ``_OutputHandler``. Args: diff --git a/sdks/python/apache_beam/runners/common_test.py b/sdks/python/apache_beam/runners/common_test.py index 00645948c3ed..cc4e8218e8af 100644 --- a/sdks/python/apache_beam/runners/common_test.py +++ b/sdks/python/apache_beam/runners/common_test.py @@ -421,7 +421,10 @@ def test_window_observing_split_on_last_window(self): expected_primary_split, expected_primary_windows, )) - hc.assert_that(residuals, hc.contains_inanyorder(expected_residual_split, )) + hc.assert_that( + residuals, hc.contains_inanyorder( + expected_residual_split, + )) self.assertEqual(stop_index, 3) def test_window_observing_split_on_first_window_fallback(self): diff --git a/sdks/python/apache_beam/runners/dask/dask_runner_test.py b/sdks/python/apache_beam/runners/dask/dask_runner_test.py index afe363ba3ee6..e1e5a4403b46 100644 --- a/sdks/python/apache_beam/runners/dask/dask_runner_test.py +++ b/sdks/python/apache_beam/runners/dask/dask_runner_test.py @@ -310,16 +310,13 @@ def test_multimap_multiside_input(self): assert_that( main | "first map" >> beam.Map( - lambda k, - d, - l: (k, sorted(d[k]), sorted([e[1] for e in l])), + lambda k, d, l: (k, sorted(d[k]), sorted([e[1] for e in l])), beam.pvalue.AsMultiMap(side), beam.pvalue.AsList(side), ) | "second map" >> beam.Map( - lambda k, - d, - l: (k[0], sorted(d[k[0]]), sorted([e[1] for e in l])), + lambda k, d, l: + (k[0], sorted(d[k[0]]), sorted([e[1] for e in l])), beam.pvalue.AsMultiMap(side), beam.pvalue.AsList(side), ), diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py index bfe56c7e38c2..7a6dc887e6be 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_exercise_metrics_pipeline.py @@ -164,14 +164,8 @@ def apply_and_run(pipeline): | beam.GroupByKey() | 'm_out' >> beam.FlatMap( lambda x: [ - 1, - 2, - 3, - 4, - 5, - beam.pvalue.TaggedOutput('once', x), - beam.pvalue.TaggedOutput('twice', x), - beam.pvalue.TaggedOutput('twice', x) + 1, 2, 3, 4, 5, beam.pvalue.TaggedOutput('once', x), beam.pvalue. + TaggedOutput('twice', x), beam.pvalue.TaggedOutput('twice', x) ])) result = pipeline.run() result.wait_until_finish() diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_job_service.py b/sdks/python/apache_beam/runners/dataflow/dataflow_job_service.py index 710c71273e34..e14fdd91d7b2 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_job_service.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_job_service.py @@ -45,8 +45,7 @@ def _invoke_runner(self): dataflow_runner.DataflowRunner.poll_for_job_completion( runner, self.result, - None, - lambda dataflow_state: self.set_state( + None, lambda dataflow_state: self.set_state( portable_runner.PipelineResult.pipeline_state_to_runner_api_state( self.result.api_jobstate_to_pipeline_state(dataflow_state)))) return self.result diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py index 78c3b64595b0..87456124b816 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics.py @@ -101,8 +101,8 @@ def _translate_step_name(self, internal_name): 'Could not translate the internal step name %r since job graph is ' 'not available.' % internal_name) user_step_name = None - if (self._job_graph and internal_name in - self._job_graph.proto_pipeline.components.transforms.keys()): + if (self._job_graph and internal_name + in self._job_graph.proto_pipeline.components.transforms.keys()): # Dataflow Runner v2 with portable job submission uses proto transform map # IDs for step names. Also PTransform.unique_name maps to user step names. # Hence we lookup user step names based on the proto. diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py index 86e71f9c1ed2..be8a08528fe2 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py @@ -308,8 +308,7 @@ class TestDataflowMetrics(unittest.TestCase): "additionalProperties": [ { "key": "original_name", - "value": - "ToIsmRecordForMultimap-out0-ElementCount" + "value": "ToIsmRecordForMultimap-out0-ElementCount" }, # yapf: disable { "key": "output_user_name", @@ -332,13 +331,13 @@ class TestDataflowMetrics(unittest.TestCase): "additionalProperties": [ { "key": "original_name", - "value": - "ToIsmRecordForMultimap-out0-ElementCount" + "value": "ToIsmRecordForMultimap-out0-ElementCount" }, # yapf: disable { "key": "output_user_name", "value": "ToIsmRecordForMultimap-out0" - }, { + }, + { "key": "tentative", "value": "true" } ] diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py index 349fee3eff26..19302923b1fb 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py @@ -169,8 +169,8 @@ def rank_error(msg): # Check that job is in a post-preparation state before starting the # final countdown. - if (str(response.currentState) not in ('JOB_STATE_PENDING', - 'JOB_STATE_QUEUED')): + if (str(response.currentState) + not in ('JOB_STATE_PENDING', 'JOB_STATE_QUEUED')): # The job has failed; ensure we see any final error messages. sleep_secs = 1.0 # poll faster during the final countdown final_countdown_timer_secs -= sleep_secs diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py index 65d0525b8a7e..bb9132bdb96e 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner_test.py @@ -397,9 +397,7 @@ def test_side_input_visitor(self): pc = p | beam.Create([]) transform = beam.Map( - lambda x, - y, - z: (x, y, z), + lambda x, y, z: (x, y, z), beam.pvalue.AsSingleton(pc), beam.pvalue.AsMultiMap(pc)) applied_transform = AppliedPTransform( diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py index da01112d313b..636a2b501196 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py @@ -230,8 +230,8 @@ def __init__( container_image = dataflow.SdkHarnessContainerImage() container_image.containerImage = container_image_url container_image.useSingleCorePerContainer = ( - common_urns.protocols.MULTI_CORE_BUNDLE_PROCESSING.urn not in - environment.capabilities) + common_urns.protocols.MULTI_CORE_BUNDLE_PROCESSING.urn + not in environment.capabilities) container_image.environmentId = id for capability in environment.capabilities: container_image.capabilities.append(capability) @@ -1059,10 +1059,9 @@ def job_id_for_name(self, job_name): pageToken=token) response = self._client.projects_locations_jobs.List(request) for job in response.jobs: - if (job.name == job_name and job.currentState in [ - dataflow.Job.CurrentStateValueValuesEnum.JOB_STATE_RUNNING, - dataflow.Job.CurrentStateValueValuesEnum.JOB_STATE_DRAINING - ]): + if (job.name == job_name and job.currentState + in [dataflow.Job.CurrentStateValueValuesEnum.JOB_STATE_RUNNING, + dataflow.Job.CurrentStateValueValuesEnum.JOB_STATE_DRAINING]): return job.id token = response.nextPageToken if token is None: @@ -1220,9 +1219,8 @@ def get_response_encoding(): def _verify_interpreter_version_is_supported(pipeline_options): - if ('%s.%s' % - (sys.version_info[0], - sys.version_info[1]) in _PYTHON_VERSIONS_SUPPORTED_BY_DATAFLOW): + if ('%s.%s' % (sys.version_info[0], sys.version_info[1]) + in _PYTHON_VERSIONS_SUPPORTED_BY_DATAFLOW): return if 'dev' in beam_version.__version__: diff --git a/sdks/python/apache_beam/runners/dataflow/internal/clients/cloudbuild/cloudbuild_v1_client.py b/sdks/python/apache_beam/runners/dataflow/internal/clients/cloudbuild/cloudbuild_v1_client.py index 9d699aba5892..52941bfe0b4b 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/clients/cloudbuild/cloudbuild_v1_client.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/clients/cloudbuild/cloudbuild_v1_client.py @@ -130,8 +130,7 @@ def RegionalWebhook(self, request, global_params=None): request_field='httpBody', request_type_name='CloudbuildLocationsRegionalWebhookRequest', response_type_name='Empty', - supports_download=False, - ) + supports_download=False, ) class OperationsService(base_api.BaseApiService): """Service class for the operations resource.""" @@ -165,8 +164,7 @@ def Cancel(self, request, global_params=None): request_field='cancelOperationRequest', request_type_name='CloudbuildOperationsCancelRequest', response_type_name='Empty', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Gets the latest state of a long-running operation. Clients can use this method to poll the operation result at intervals as recommended by the API service. @@ -191,8 +189,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildOperationsGetRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsBuildsService(base_api.BaseApiService): """Service class for the projects_builds resource.""" @@ -226,8 +223,7 @@ def Approve(self, request, global_params=None): request_field='approveBuildRequest', request_type_name='CloudbuildProjectsBuildsApproveRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Cancel(self, request, global_params=None): r"""Cancels a build in progress. @@ -251,8 +247,7 @@ def Cancel(self, request, global_params=None): request_field='', request_type_name='CancelBuildRequest', response_type_name='Build', - supports_download=False, - ) + supports_download=False, ) def Create(self, request, global_params=None): r"""Starts a build with the specified configuration. This method returns a long-running `Operation`, which includes the build ID. Pass the build ID to `GetBuild` to determine the build status (such as `SUCCESS` or `FAILURE`). @@ -276,8 +271,7 @@ def Create(self, request, global_params=None): request_field='build', request_type_name='CloudbuildProjectsBuildsCreateRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Returns information about a previously requested build. The `Build` that is returned includes its status (such as `SUCCESS`, `FAILURE`, or `WORKING`), and timing information. @@ -301,8 +295,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsBuildsGetRequest', response_type_name='Build', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""Lists previously requested builds. Previously requested builds may still be in-progress, or may have finished successfully or unsuccessfully. @@ -326,8 +319,7 @@ def List(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsBuildsListRequest', response_type_name='ListBuildsResponse', - supports_download=False, - ) + supports_download=False, ) def Retry(self, request, global_params=None): r"""Creates a new build based on the specified build. This method creates a new build using the original build request, which may or may not result in an identical build. For triggered builds: * Triggered builds resolve to a precise revision; therefore a retry of a triggered build will result in a build that uses the same revision. For non-triggered builds that specify `RepoSource`: * If the original build built from the tip of a branch, the retried build will build from the tip of that branch, which may not be the same revision as the original build. * If the original build specified a commit sha or revision ID, the retried build will use the identical source. For builds that specify `StorageSource`: * If the original build pulled source from Google Cloud Storage without specifying the generation of the object, the new build will use the current object, which may be different from the original build source. * If the original build pulled source from Cloud Storage and specified the generation of the object, the new build will attempt to use the same object, which may or may not be available depending on the bucket's lifecycle management settings. @@ -351,8 +343,7 @@ def Retry(self, request, global_params=None): request_field='', request_type_name='RetryBuildRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsGithubEnterpriseConfigsService(base_api.BaseApiService): """Service class for the projects_githubEnterpriseConfigs resource.""" @@ -388,8 +379,7 @@ def Create(self, request, global_params=None): request_type_name= 'CloudbuildProjectsGithubEnterpriseConfigsCreateRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Delete(self, request, global_params=None): r"""Delete an association between a GCP project and a GitHub Enterprise server. @@ -416,8 +406,7 @@ def Delete(self, request, global_params=None): request_type_name= 'CloudbuildProjectsGithubEnterpriseConfigsDeleteRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Retrieve a GitHubEnterpriseConfig. @@ -443,8 +432,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsGithubEnterpriseConfigsGetRequest', response_type_name='GitHubEnterpriseConfig', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""List all GitHubEnterpriseConfigs for a given project. @@ -470,8 +458,7 @@ def List(self, request, global_params=None): request_type_name= 'CloudbuildProjectsGithubEnterpriseConfigsListRequest', response_type_name='ListGithubEnterpriseConfigsResponse', - supports_download=False, - ) + supports_download=False, ) def Patch(self, request, global_params=None): r"""Update an association between a GCP project and a GitHub Enterprise server. @@ -498,8 +485,7 @@ def Patch(self, request, global_params=None): request_type_name= 'CloudbuildProjectsGithubEnterpriseConfigsPatchRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsBitbucketServerConfigsConnectedRepositoriesService( base_api.BaseApiService): @@ -540,8 +526,7 @@ def BatchCreate(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsConnectedRepositoriesBatchCreateRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsBitbucketServerConfigsReposService( base_api.BaseApiService): @@ -581,8 +566,7 @@ def List(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsReposListRequest', response_type_name='ListBitbucketServerRepositoriesResponse', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsBitbucketServerConfigsService(base_api.BaseApiService): """Service class for the projects_locations_bitbucketServerConfigs resource.""" @@ -621,8 +605,7 @@ def AddBitbucketServerConnectedRepository( request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsAddBitbucketServerConnectedRepositoryRequest', response_type_name='AddBitbucketServerConnectedRepositoryResponse', - supports_download=False, - ) + supports_download=False, ) def Create(self, request, global_params=None): r"""Creates a new `BitbucketServerConfig`. This API is experimental. @@ -649,8 +632,7 @@ def Create(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsCreateRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Delete(self, request, global_params=None): r"""Delete a `BitbucketServerConfig`. This API is experimental. @@ -677,8 +659,7 @@ def Delete(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsDeleteRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Retrieve a `BitbucketServerConfig`. This API is experimental. @@ -705,8 +686,7 @@ def Get(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsGetRequest', response_type_name='BitbucketServerConfig', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""List all `BitbucketServerConfigs` for a given project. This API is experimental. @@ -733,8 +713,7 @@ def List(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsListRequest', response_type_name='ListBitbucketServerConfigsResponse', - supports_download=False, - ) + supports_download=False, ) def Patch(self, request, global_params=None): r"""Updates an existing `BitbucketServerConfig`. This API is experimental. @@ -761,8 +740,7 @@ def Patch(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsPatchRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def RemoveBitbucketServerConnectedRepository( self, request, global_params=None): @@ -791,8 +769,7 @@ def RemoveBitbucketServerConnectedRepository( request_type_name= 'CloudbuildProjectsLocationsBitbucketServerConfigsRemoveBitbucketServerConnectedRepositoryRequest', response_type_name='Empty', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsBuildsService(base_api.BaseApiService): """Service class for the projects_locations_builds resource.""" @@ -827,8 +804,7 @@ def Approve(self, request, global_params=None): request_field='approveBuildRequest', request_type_name='CloudbuildProjectsLocationsBuildsApproveRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Cancel(self, request, global_params=None): r"""Cancels a build in progress. @@ -854,8 +830,7 @@ def Cancel(self, request, global_params=None): request_field='', request_type_name='CancelBuildRequest', response_type_name='Build', - supports_download=False, - ) + supports_download=False, ) def Create(self, request, global_params=None): r"""Starts a build with the specified configuration. This method returns a long-running `Operation`, which includes the build ID. Pass the build ID to `GetBuild` to determine the build status (such as `SUCCESS` or `FAILURE`). @@ -880,8 +855,7 @@ def Create(self, request, global_params=None): request_field='build', request_type_name='CloudbuildProjectsLocationsBuildsCreateRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Returns information about a previously requested build. The `Build` that is returned includes its status (such as `SUCCESS`, `FAILURE`, or `WORKING`), and timing information. @@ -907,8 +881,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsBuildsGetRequest', response_type_name='Build', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""Lists previously requested builds. Previously requested builds may still be in-progress, or may have finished successfully or unsuccessfully. @@ -933,8 +906,7 @@ def List(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsBuildsListRequest', response_type_name='ListBuildsResponse', - supports_download=False, - ) + supports_download=False, ) def Retry(self, request, global_params=None): r"""Creates a new build based on the specified build. This method creates a new build using the original build request, which may or may not result in an identical build. For triggered builds: * Triggered builds resolve to a precise revision; therefore a retry of a triggered build will result in a build that uses the same revision. For non-triggered builds that specify `RepoSource`: * If the original build built from the tip of a branch, the retried build will build from the tip of that branch, which may not be the same revision as the original build. * If the original build specified a commit sha or revision ID, the retried build will use the identical source. For builds that specify `StorageSource`: * If the original build pulled source from Google Cloud Storage without specifying the generation of the object, the new build will use the current object, which may be different from the original build source. * If the original build pulled source from Cloud Storage and specified the generation of the object, the new build will attempt to use the same object, which may or may not be available depending on the bucket's lifecycle management settings. @@ -960,8 +932,7 @@ def Retry(self, request, global_params=None): request_field='', request_type_name='RetryBuildRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsGithubEnterpriseConfigsService(base_api.BaseApiService ): @@ -1000,8 +971,7 @@ def Create(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsGithubEnterpriseConfigsCreateRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Delete(self, request, global_params=None): r"""Delete an association between a GCP project and a GitHub Enterprise server. @@ -1029,8 +999,7 @@ def Delete(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsGithubEnterpriseConfigsDeleteRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Retrieve a GitHubEnterpriseConfig. @@ -1057,8 +1026,7 @@ def Get(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsGithubEnterpriseConfigsGetRequest', response_type_name='GitHubEnterpriseConfig', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""List all GitHubEnterpriseConfigs for a given project. @@ -1085,8 +1053,7 @@ def List(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsGithubEnterpriseConfigsListRequest', response_type_name='ListGithubEnterpriseConfigsResponse', - supports_download=False, - ) + supports_download=False, ) def Patch(self, request, global_params=None): r"""Update an association between a GCP project and a GitHub Enterprise server. @@ -1113,8 +1080,7 @@ def Patch(self, request, global_params=None): request_type_name= 'CloudbuildProjectsLocationsGithubEnterpriseConfigsPatchRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsOperationsService(base_api.BaseApiService): """Service class for the projects_locations_operations resource.""" @@ -1150,8 +1116,7 @@ def Cancel(self, request, global_params=None): request_field='cancelOperationRequest', request_type_name='CloudbuildProjectsLocationsOperationsCancelRequest', response_type_name='Empty', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Gets the latest state of a long-running operation. Clients can use this method to poll the operation result at intervals as recommended by the API service. @@ -1177,8 +1142,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsOperationsGetRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsTriggersService(base_api.BaseApiService): """Service class for the projects_locations_triggers resource.""" @@ -1213,8 +1177,7 @@ def Create(self, request, global_params=None): request_field='buildTrigger', request_type_name='CloudbuildProjectsLocationsTriggersCreateRequest', response_type_name='BuildTrigger', - supports_download=False, - ) + supports_download=False, ) def Delete(self, request, global_params=None): r"""Deletes a `BuildTrigger` by its project ID and trigger ID. This API is experimental. @@ -1240,8 +1203,7 @@ def Delete(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsTriggersDeleteRequest', response_type_name='Empty', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Returns information about a `BuildTrigger`. This API is experimental. @@ -1267,8 +1229,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsTriggersGetRequest', response_type_name='BuildTrigger', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""Lists existing `BuildTrigger`s. This API is experimental. @@ -1293,8 +1254,7 @@ def List(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsTriggersListRequest', response_type_name='ListBuildTriggersResponse', - supports_download=False, - ) + supports_download=False, ) def Patch(self, request, global_params=None): r"""Updates a `BuildTrigger` by its project ID and trigger ID. This API is experimental. @@ -1320,8 +1280,7 @@ def Patch(self, request, global_params=None): request_field='buildTrigger', request_type_name='CloudbuildProjectsLocationsTriggersPatchRequest', response_type_name='BuildTrigger', - supports_download=False, - ) + supports_download=False, ) def Run(self, request, global_params=None): r"""Runs a `BuildTrigger` at a particular source revision. @@ -1347,8 +1306,7 @@ def Run(self, request, global_params=None): request_field='runBuildTriggerRequest', request_type_name='CloudbuildProjectsLocationsTriggersRunRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Webhook(self, request, global_params=None): r"""ReceiveTriggerWebhook [Experimental] is called when the API receives a webhook request targeted at a specific trigger. @@ -1374,8 +1332,7 @@ def Webhook(self, request, global_params=None): request_field='httpBody', request_type_name='CloudbuildProjectsLocationsTriggersWebhookRequest', response_type_name='ReceiveTriggerWebhookResponse', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsWorkerPoolsService(base_api.BaseApiService): """Service class for the projects_locations_workerPools resource.""" @@ -1411,8 +1368,7 @@ def Create(self, request, global_params=None): request_field='workerPool', request_type_name='CloudbuildProjectsLocationsWorkerPoolsCreateRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Delete(self, request, global_params=None): r"""Deletes a `WorkerPool`. @@ -1438,8 +1394,7 @@ def Delete(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsWorkerPoolsDeleteRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Returns details of a `WorkerPool`. @@ -1465,8 +1420,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsWorkerPoolsGetRequest', response_type_name='WorkerPool', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""Lists `WorkerPool`s. @@ -1492,8 +1446,7 @@ def List(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsLocationsWorkerPoolsListRequest', response_type_name='ListWorkerPoolsResponse', - supports_download=False, - ) + supports_download=False, ) def Patch(self, request, global_params=None): r"""Updates a `WorkerPool`. @@ -1519,8 +1472,7 @@ def Patch(self, request, global_params=None): request_field='workerPool', request_type_name='CloudbuildProjectsLocationsWorkerPoolsPatchRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) class ProjectsLocationsService(base_api.BaseApiService): """Service class for the projects_locations resource.""" @@ -1562,8 +1514,7 @@ def Create(self, request, global_params=None): request_field='buildTrigger', request_type_name='CloudbuildProjectsTriggersCreateRequest', response_type_name='BuildTrigger', - supports_download=False, - ) + supports_download=False, ) def Delete(self, request, global_params=None): r"""Deletes a `BuildTrigger` by its project ID and trigger ID. This API is experimental. @@ -1587,8 +1538,7 @@ def Delete(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsTriggersDeleteRequest', response_type_name='Empty', - supports_download=False, - ) + supports_download=False, ) def Get(self, request, global_params=None): r"""Returns information about a `BuildTrigger`. This API is experimental. @@ -1612,8 +1562,7 @@ def Get(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsTriggersGetRequest', response_type_name='BuildTrigger', - supports_download=False, - ) + supports_download=False, ) def List(self, request, global_params=None): r"""Lists existing `BuildTrigger`s. This API is experimental. @@ -1637,8 +1586,7 @@ def List(self, request, global_params=None): request_field='', request_type_name='CloudbuildProjectsTriggersListRequest', response_type_name='ListBuildTriggersResponse', - supports_download=False, - ) + supports_download=False, ) def Patch(self, request, global_params=None): r"""Updates a `BuildTrigger` by its project ID and trigger ID. This API is experimental. @@ -1662,8 +1610,7 @@ def Patch(self, request, global_params=None): request_field='buildTrigger', request_type_name='CloudbuildProjectsTriggersPatchRequest', response_type_name='BuildTrigger', - supports_download=False, - ) + supports_download=False, ) def Run(self, request, global_params=None): r"""Runs a `BuildTrigger` at a particular source revision. @@ -1687,8 +1634,7 @@ def Run(self, request, global_params=None): request_field='repoSource', request_type_name='CloudbuildProjectsTriggersRunRequest', response_type_name='Operation', - supports_download=False, - ) + supports_download=False, ) def Webhook(self, request, global_params=None): r"""ReceiveTriggerWebhook [Experimental] is called when the API receives a webhook request targeted at a specific trigger. @@ -1712,8 +1658,7 @@ def Webhook(self, request, global_params=None): request_field='httpBody', request_type_name='CloudbuildProjectsTriggersWebhookRequest', response_type_name='ReceiveTriggerWebhookResponse', - supports_download=False, - ) + supports_download=False, ) class ProjectsService(base_api.BaseApiService): """Service class for the projects resource.""" @@ -1755,5 +1700,4 @@ def Webhook(self, request, global_params=None): request_field='httpBody', request_type_name='CloudbuildWebhookRequest', response_type_name='Empty', - supports_download=False, - ) + supports_download=False, ) diff --git a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py index 7eba868afba0..92ea235c82e3 100644 --- a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py +++ b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor_test.py @@ -148,14 +148,12 @@ def test_visitor_not_sorted(self): # Convert to string to assert they are equal. out_of_order_labels = { str(k): [str(t) for t in value_to_consumer] - for k, - value_to_consumer in v_out_of_order.value_to_consumers.items() + for k, value_to_consumer in v_out_of_order.value_to_consumers.items() } original_labels = { str(k): [str(t) for t in value_to_consumer] - for k, - value_to_consumer in v_original.value_to_consumers.items() + for k, value_to_consumer in v_original.value_to_consumers.items() } self.assertDictEqual(out_of_order_labels, original_labels) diff --git a/sdks/python/apache_beam/runners/direct/direct_metrics.py b/sdks/python/apache_beam/runners/direct/direct_metrics.py index 5beb19d4610a..6e3b72c7fcac 100644 --- a/sdks/python/apache_beam/runners/direct/direct_metrics.py +++ b/sdks/python/apache_beam/runners/direct/direct_metrics.py @@ -139,36 +139,36 @@ def query(self, filter=None): MetricResult( MetricKey(k.step, k.metric), v.extract_committed(), - v.extract_latest_attempted()) for k, - v in self._counters.items() if self.matches(filter, k) + v.extract_latest_attempted()) for k, v in self._counters.items() + if self.matches(filter, k) ] distributions = [ MetricResult( MetricKey(k.step, k.metric), v.extract_committed(), - v.extract_latest_attempted()) for k, - v in self._distributions.items() if self.matches(filter, k) + v.extract_latest_attempted()) + for k, v in self._distributions.items() if self.matches(filter, k) ] gauges = [ MetricResult( MetricKey(k.step, k.metric), v.extract_committed(), - v.extract_latest_attempted()) for k, - v in self._gauges.items() if self.matches(filter, k) + v.extract_latest_attempted()) for k, v in self._gauges.items() + if self.matches(filter, k) ] string_sets = [ MetricResult( MetricKey(k.step, k.metric), v.extract_committed(), - v.extract_latest_attempted()) for k, - v in self._string_sets.items() if self.matches(filter, k) + v.extract_latest_attempted()) for k, v in self._string_sets.items() + if self.matches(filter, k) ] bounded_tries = [ MetricResult( MetricKey(k.step, k.metric), v.extract_committed(), - v.extract_latest_attempted()) for k, - v in self._bounded_tries.items() if self.matches(filter, k) + v.extract_latest_attempted()) + for k, v in self._bounded_tries.items() if self.matches(filter, k) ] return { diff --git a/sdks/python/apache_beam/runners/direct/evaluation_context.py b/sdks/python/apache_beam/runners/direct/evaluation_context.py index c34735499abc..e787eafbc259 100644 --- a/sdks/python/apache_beam/runners/direct/evaluation_context.py +++ b/sdks/python/apache_beam/runners/direct/evaluation_context.py @@ -346,8 +346,8 @@ def _update_side_inputs_container( registered as a PCollectionView, we add the result to the PCollectionView. """ if (result.uncommitted_output_bundles and - result.uncommitted_output_bundles[0].pcollection in - self._pcollection_to_views): + result.uncommitted_output_bundles[0].pcollection + in self._pcollection_to_views): for view in self._pcollection_to_views[ result.uncommitted_output_bundles[0].pcollection]: for committed_bundle in committed_bundles: diff --git a/sdks/python/apache_beam/runners/direct/transform_evaluator.py b/sdks/python/apache_beam/runners/direct/transform_evaluator.py index b0278ba5356c..ee97b729ac28 100644 --- a/sdks/python/apache_beam/runners/direct/transform_evaluator.py +++ b/sdks/python/apache_beam/runners/direct/transform_evaluator.py @@ -354,8 +354,7 @@ def _read_values_to_bundles(reader): return self._split_list_into_bundles( output_pcollection, read_result, - _BoundedReadEvaluator.MAX_ELEMENT_PER_BUNDLE, - lambda _: 1) + _BoundedReadEvaluator.MAX_ELEMENT_PER_BUNDLE, lambda _: 1) if isinstance(self._source, io.iobase.BoundedSource): # Getting a RangeTracker for the default range of the source and reading diff --git a/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py b/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py index aef2f768237e..5954d436ad28 100644 --- a/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py +++ b/sdks/python/apache_beam/runners/interactive/background_caching_job_test.py @@ -84,14 +84,11 @@ def tearDown(self): # that meet the boundedness checks. @patch( 'apache_beam.runners.interactive.background_caching_job' - '.has_source_to_cache', - lambda x: True) + '.has_source_to_cache', lambda x: True) # Disable the clean up so that we can keep the test streaming cache. @patch( 'apache_beam.runners.interactive.interactive_environment' - '.InteractiveEnvironment.cleanup', - lambda x, - y: None) + '.InteractiveEnvironment.cleanup', lambda x, y: None) def test_background_caching_job_starts_when_none_such_job_exists(self): # Create a fake PipelineResult and PipelineRunner. This is because we want @@ -126,8 +123,7 @@ def run_pipeline(self, pipeline, options): @patch( 'apache_beam.runners.interactive.background_caching_job' - '.has_source_to_cache', - lambda x: False) + '.has_source_to_cache', lambda x: False) def test_background_caching_job_not_start_for_batch_pipeline(self): p = beam.Pipeline() @@ -138,14 +134,11 @@ def test_background_caching_job_not_start_for_batch_pipeline(self): @patch( 'apache_beam.runners.interactive.background_caching_job' - '.has_source_to_cache', - lambda x: True) + '.has_source_to_cache', lambda x: True) # Disable the clean up so that we can keep the test streaming cache. @patch( 'apache_beam.runners.interactive.interactive_environment' - '.InteractiveEnvironment.cleanup', - lambda x, - y: None) + '.InteractiveEnvironment.cleanup', lambda x, y: None) def test_background_caching_job_not_start_when_such_job_exists(self): p = _build_a_test_stream_pipeline() _setup_test_streaming_cache(p) @@ -163,14 +156,11 @@ def test_background_caching_job_not_start_when_such_job_exists(self): @patch( 'apache_beam.runners.interactive.background_caching_job' - '.has_source_to_cache', - lambda x: True) + '.has_source_to_cache', lambda x: True) # Disable the clean up so that we can keep the test streaming cache. @patch( 'apache_beam.runners.interactive.interactive_environment' - '.InteractiveEnvironment.cleanup', - lambda x, - y: None) + '.InteractiveEnvironment.cleanup', lambda x, y: None) def test_background_caching_job_not_start_when_such_job_is_done(self): p = _build_a_test_stream_pipeline() _setup_test_streaming_cache(p) diff --git a/sdks/python/apache_beam/runners/interactive/cache_manager.py b/sdks/python/apache_beam/runners/interactive/cache_manager.py index ac592475c057..e725f3f82ac5 100644 --- a/sdks/python/apache_beam/runners/interactive/cache_manager.py +++ b/sdks/python/apache_beam/runners/interactive/cache_manager.py @@ -158,13 +158,10 @@ class FileBasedCacheManager(CacheManager): _available_formats = { 'text': ( lambda path: textio.ReadFromText( - path, - coder=Base64Coder(), - compression_type=filesystems.CompressionTypes.BZIP2), - lambda path: textio.WriteToText( - path, - coder=Base64Coder(), - compression_type=filesystems.CompressionTypes.BZIP2)), + path, coder=Base64Coder(), compression_type=filesystems. + CompressionTypes.BZIP2), lambda path: textio.WriteToText( + path, coder=Base64Coder(), compression_type=filesystems. + CompressionTypes.BZIP2)), 'tfrecord': (tfrecordio.ReadFromTFRecord, tfrecordio.WriteToTFRecord) } diff --git a/sdks/python/apache_beam/runners/interactive/caching/read_cache.py b/sdks/python/apache_beam/runners/interactive/caching/read_cache.py index cf0859d5804a..ac7ef5cae561 100644 --- a/sdks/python/apache_beam/runners/interactive/caching/read_cache.py +++ b/sdks/python/apache_beam/runners/interactive/caching/read_cache.py @@ -84,8 +84,8 @@ def read_cache(self) -> Tuple[str, str]: self._pipeline.components.coders[coder_id].CopyFrom( template.components.coders[coder_id]) for windowing_strategy_id in template.components.windowing_strategies: - if (windowing_strategy_id in - self._pipeline.components.windowing_strategies): + if (windowing_strategy_id + in self._pipeline.components.windowing_strategies): continue self._pipeline.components.windowing_strategies[ windowing_strategy_id].CopyFrom( diff --git a/sdks/python/apache_beam/runners/interactive/caching/write_cache.py b/sdks/python/apache_beam/runners/interactive/caching/write_cache.py index d398e70338b6..e56073b009cb 100644 --- a/sdks/python/apache_beam/runners/interactive/caching/write_cache.py +++ b/sdks/python/apache_beam/runners/interactive/caching/write_cache.py @@ -75,9 +75,8 @@ def write_cache(self) -> None: # Copy cache writing subgraph from the template to the pipeline proto. for pcoll_id in template.components.pcollections: - if (pcoll_id in self._pipeline.components.pcollections or - pcoll_id in write_input_placeholder.ignorable_components.pcollections - ): + if (pcoll_id in self._pipeline.components.pcollections or pcoll_id + in write_input_placeholder.ignorable_components.pcollections): continue self._pipeline.components.pcollections[pcoll_id].CopyFrom( template.components.pcollections[pcoll_id]) @@ -88,10 +87,10 @@ def write_cache(self) -> None: self._pipeline.components.coders[coder_id].CopyFrom( template.components.coders[coder_id]) for windowing_strategy_id in template.components.windowing_strategies: - if (windowing_strategy_id in - self._pipeline.components.windowing_strategies or - windowing_strategy_id in - write_input_placeholder.ignorable_components.windowing_strategies): + if (windowing_strategy_id + in self._pipeline.components.windowing_strategies or + windowing_strategy_id + in write_input_placeholder.ignorable_components.windowing_strategies): continue self._pipeline.components.windowing_strategies[ windowing_strategy_id].CopyFrom( @@ -106,8 +105,8 @@ def write_cache(self) -> None: template.components.transforms[transform_id]) for top_level_transform in template.components.transforms[ template_root_transform_id].subtransforms: - if (top_level_transform in - write_input_placeholder.ignorable_components.transforms): + if (top_level_transform + in write_input_placeholder.ignorable_components.transforms): continue self._pipeline.components.transforms[ root_transform_id].subtransforms.append(top_level_transform) diff --git a/sdks/python/apache_beam/runners/interactive/interactive_environment.py b/sdks/python/apache_beam/runners/interactive/interactive_environment.py index 0e3d0060b1a4..1f48121016c5 100644 --- a/sdks/python/apache_beam/runners/interactive/interactive_environment.py +++ b/sdks/python/apache_beam/runners/interactive/interactive_environment.py @@ -462,8 +462,7 @@ def describe_all_recordings(self): """Returns a description of the recording for all watched pipelnes.""" return { self.pipeline_id_to_pipeline(pid): rm.describe() - for pid, - rm in self._recording_managers.items() + for pid, rm in self._recording_managers.items() } def set_pipeline_result(self, pipeline, result): diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py index 8e5d50ed3f3f..95e30f7cb0fa 100644 --- a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py +++ b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py @@ -178,8 +178,7 @@ def _required_components( visited_copy = visited.copy() consuming_transforms = { t_id: t - for t_id, - t in transforms.items() + for t_id, t in transforms.items() if set(outputs).intersection(set(t.inputs.values())) } consuming_transforms = set(consuming_transforms.keys()) @@ -201,8 +200,7 @@ def _required_components( ] producing_transforms = { t_id: t - for t_id, - t in transforms.items() + for t_id, t in transforms.items() if set(inputs).intersection(set(t.outputs.values())) } (t, pc) = self._required_components( @@ -296,8 +294,8 @@ def background_caching_pipeline_proto(self): # Get the IDs of the unbounded sources. required_transform_labels = [src.full_label for src in sources] unbounded_source_ids = [ - k for k, - v in transforms.items() if v.unique_name in required_transform_labels + k for k, v in transforms.items() + if v.unique_name in required_transform_labels ] # The required transforms are the transforms that we want to cut out of @@ -596,8 +594,8 @@ def _read_cache(self, pipeline, pcoll, is_unbounded_source_output): is_cached = self._cache_manager.exists('full', key) is_computed = ( pcoll in self._runner_pcoll_to_user_pcoll and - self._runner_pcoll_to_user_pcoll[pcoll] in - ie.current_env().computed_pcollections) + self._runner_pcoll_to_user_pcoll[pcoll] + in ie.current_env().computed_pcollections) if ((is_cached and is_computed) or is_unbounded_source_output): if key not in self._cached_pcoll_read: # Mutates the pipeline with cache read transform attached diff --git a/sdks/python/apache_beam/runners/interactive/recording_manager.py b/sdks/python/apache_beam/runners/interactive/recording_manager.py index 6811d3e0d345..ce6fbd6d8ae8 100644 --- a/sdks/python/apache_beam/runners/interactive/recording_manager.py +++ b/sdks/python/apache_beam/runners/interactive/recording_manager.py @@ -151,18 +151,19 @@ class Recording: def __init__( self, user_pipeline: beam.Pipeline, - pcolls: List[beam.pvalue.PCollection], # noqa: F821 + pcolls: List[beam.pvalue.PCollection], # noqa: F821 result: 'beam.runner.PipelineResult', max_n: int, max_duration_secs: float, - ): + ): self._user_pipeline = user_pipeline self._result = result self._result_lock = threading.Lock() self._pcolls = pcolls - pcoll_var = lambda pcoll: {v: k - for k, v in utils.pcoll_by_name().items()}.get( - pcoll, None) + pcoll_var = lambda pcoll: { + v: k + for k, v in utils.pcoll_by_name().items() + }.get(pcoll, None) self._streams = { pcoll: ElementStream( diff --git a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py index 3d843a0f6ae8..9dd74b16a5ce 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py +++ b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics_test.py @@ -76,10 +76,7 @@ def test_build_query_components_when_single_pcoll_queried(self): with patch('apache_beam.runners.interactive.sql.beam_sql_magics.' 'unreify_from_cache', - lambda pipeline, - cache_key, - cache_manager, - element_type: target): + lambda pipeline, cache_key, cache_manager, element_type: target): processed_query, sql_source, chain = _build_query_components( query, found, 'output') expected_query = 'SELECT * FROM PCOLLECTION where a=1' @@ -97,12 +94,10 @@ def test_build_query_components_when_multiple_pcolls_queried(self): query = 'SELECT * FROM pcoll_1 JOIN pcoll_2 USING (a)' found = {'pcoll_1': pcoll_1, 'pcoll_2': pcoll_2} - with patch('apache_beam.runners.interactive.sql.beam_sql_magics.' - 'unreify_from_cache', - lambda pipeline, - cache_key, - cache_manager, - element_type: pcoll_1): + with patch( + 'apache_beam.runners.interactive.sql.beam_sql_magics.' + 'unreify_from_cache', + lambda pipeline, cache_key, cache_manager, element_type: pcoll_1): processed_query, sql_source, chain = _build_query_components( query, found, 'output') @@ -124,10 +119,7 @@ def test_build_query_components_when_unbounded_pcolls_queried(self): found = {'pcoll': pcoll} with patch('apache_beam.runners.interactive.sql.beam_sql_magics.' - 'pcolls_from_streaming_cache', - lambda a, - b, - c: found): + 'pcolls_from_streaming_cache', lambda a, b, c: found): _, sql_source, chain = _build_query_components(query, found, 'output') self.assertIs(sql_source, pcoll) self.assertIn('pcoll', chain.current.source) @@ -141,9 +133,7 @@ def test_cache_output(self): ie.current_env().set_cache_manager(cache_manager, p_cache_output) ib.watch(locals()) with patch('apache_beam.runners.interactive.display.pcoll_visualization.' - 'visualize_computed_pcoll', - lambda a, - b: None): + 'visualize_computed_pcoll', lambda a, b: None): cache_output('pcoll_co', pcoll_co) self.assertIn(pcoll_co, ie.current_env().computed_pcollections) self.assertTrue( diff --git a/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py b/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py index a6f48661b87b..9cafbb6c9039 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py +++ b/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py @@ -165,8 +165,7 @@ def expand(self, source): self.output_name, tag, self.execution_count) >> beam.ParDo( self._SqlTransformDoFn(self.schemas, self.schema_annotations)) if pcoll.element_type in self.schemas else pcoll - for tag, - pcoll in source.items() + for tag, pcoll in source.items() } elif isinstance(source, beam.pvalue.PCollection): schema_loaded = source | 'load_schemas_{}_{}'.format( diff --git a/sdks/python/apache_beam/runners/interactive/sql/utils.py b/sdks/python/apache_beam/runners/interactive/sql/utils.py index a6e810d5555b..2e46c0f23a7a 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/utils.py +++ b/sdks/python/apache_beam/runners/interactive/sql/utils.py @@ -115,8 +115,8 @@ def pformat_namedtuple(schema: NamedTuple) -> str: return '{}({})'.format( schema.__name__, ', '.join([ - '{}: {}'.format(k, repr(v)) for k, - v in schema.__annotations__.items() + '{}: {}'.format(k, repr(v)) + for k, v in schema.__annotations__.items() ])) diff --git a/sdks/python/apache_beam/runners/pipeline_utils.py b/sdks/python/apache_beam/runners/pipeline_utils.py index 7c38c034e2a7..c0c5e199c7d5 100644 --- a/sdks/python/apache_beam/runners/pipeline_utils.py +++ b/sdks/python/apache_beam/runners/pipeline_utils.py @@ -95,11 +95,11 @@ def validate_transform(transform_id): "Bad coder for output of %s: %s" % (transform_id, output_coder)) output_values_coder = pipeline_proto.components.coders[ output_coder.component_coder_ids[1]] - if (input_coder.component_coder_ids[0] != - output_coder.component_coder_ids[0] or + if (input_coder.component_coder_ids[0] + != output_coder.component_coder_ids[0] or output_values_coder.spec.urn != common_urns.coders.ITERABLE.urn or - output_values_coder.component_coder_ids[0] != - input_coder.component_coder_ids[1]): + output_values_coder.component_coder_ids[0] + != input_coder.component_coder_ids[1]): raise ValueError( "Incompatible input coder %s and output coder %s for transform %s" % (transform_id, input_coder, output_coder)) @@ -183,7 +183,8 @@ def merge_common_environments(pipeline_proto, inplace=False): environment_remappings = { e: es[0] - for es in canonical_environments.values() for e in es + for es in canonical_environments.values() + for e in es } return update_environments(pipeline_proto, environment_remappings, inplace) diff --git a/sdks/python/apache_beam/runners/portability/abstract_job_service.py b/sdks/python/apache_beam/runners/portability/abstract_job_service.py index 87162d5feda5..3a3ad1507813 100644 --- a/sdks/python/apache_beam/runners/portability/abstract_job_service.py +++ b/sdks/python/apache_beam/runners/portability/abstract_job_service.py @@ -72,12 +72,12 @@ class AbstractJobServiceServicer(beam_job_api_pb2_grpc.JobServiceServicer): def __init__(self): self._jobs: Dict[str, AbstractBeamJob] = {} - def create_beam_job(self, - preparation_id, # stype: str - job_name: str, - pipeline: beam_runner_api_pb2.Pipeline, - options: struct_pb2.Struct - ) -> 'AbstractBeamJob': + def create_beam_job( + self, + preparation_id, # stype: str + job_name: str, + pipeline: beam_runner_api_pb2.Pipeline, + options: struct_pb2.Struct) -> 'AbstractBeamJob': """Returns an instance of AbstractBeamJob specific to this servicer.""" raise NotImplementedError(type(self)) diff --git a/sdks/python/apache_beam/runners/portability/artifact_service.py b/sdks/python/apache_beam/runners/portability/artifact_service.py index b9395caeafaf..60b89f3a424a 100644 --- a/sdks/python/apache_beam/runners/portability/artifact_service.py +++ b/sdks/python/apache_beam/runners/portability/artifact_service.py @@ -162,8 +162,7 @@ def resolve(): for key, dependencies in dependency_sets.items(): dependency_sets[key] = list( resolve_as_files( - ForwardingRetrievalService(), - lambda name: self._file_writer( + ForwardingRetrievalService(), lambda name: self._file_writer( os.path.join(staging_token, name)), dependencies)) requests.done() diff --git a/sdks/python/apache_beam/runners/portability/expansion_service.py b/sdks/python/apache_beam/runners/portability/expansion_service.py index 4890dd9215e7..12e3ffb69702 100644 --- a/sdks/python/apache_beam/runners/portability/expansion_service.py +++ b/sdks/python/apache_beam/runners/portability/expansion_service.py @@ -83,17 +83,15 @@ def with_pipeline(component, pcoll_id=None): requirements=request.requirements) producers = { pcoll_id: (context.transforms.get_by_id(t_id), pcoll_tag) - for t_id, - t_proto in request.components.transforms.items() for pcoll_tag, - pcoll_id in t_proto.outputs.items() + for t_id, t_proto in request.components.transforms.items() + for pcoll_tag, pcoll_id in t_proto.outputs.items() } transform = with_pipeline( ptransform.PTransform.from_runner_api(request.transform, context)) if len(request.output_coder_requests) == 1: output_coder = { k: context.element_type_from_coder_id(v) - for k, - v in request.output_coder_requests.items() + for k, v in request.output_coder_requests.items() } transform = transform.with_output_types(list(output_coder.values())[0]) elif len(request.output_coder_requests) > 1: @@ -101,10 +99,9 @@ def with_pipeline(component, pcoll_id=None): 'type annotation for multiple outputs is not allowed yet: %s' % request.output_coder_requests) inputs = transform._pvaluish_from_dict({ - tag: - with_pipeline(context.pcollections.get_by_id(pcoll_id), pcoll_id) - for tag, - pcoll_id in request.transform.inputs.items() + tag: with_pipeline( + context.pcollections.get_by_id(pcoll_id), pcoll_id) + for tag, pcoll_id in request.transform.inputs.items() }) if not inputs: inputs = pipeline diff --git a/sdks/python/apache_beam/runners/portability/flink_runner_test.py b/sdks/python/apache_beam/runners/portability/flink_runner_test.py index a2b5b0013d7b..6aa913105ac1 100644 --- a/sdks/python/apache_beam/runners/portability/flink_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/flink_runner_test.py @@ -257,8 +257,8 @@ def test_expand_kafka_read(self): allow_duplicates=False, expansion_service=self.get_expansion_service())) self.assertTrue( - 'No resolvable bootstrap urls given in bootstrap.servers' in str( - ctx.exception), + 'No resolvable bootstrap urls given in bootstrap.servers' + in str(ctx.exception), 'Expected to fail due to invalid bootstrap.servers, but ' 'failed due to:\n%s' % str(ctx.exception)) diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py index e69e37495f64..ec9f5dd15ba8 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py @@ -251,7 +251,8 @@ def partition(self, n: int) -> List[List[bytes]]: index=0, nonspeculative_index=0)).with_value windowed_key_values = lambda key, values: [ - globally_window((key, values))] + globally_window((key, values)) + ] else: # TODO(pabloem, BEAM-7514): Trigger driver needs access to the clock # note that this only comes through if windowing is default - but what @@ -1169,8 +1170,8 @@ def input_for(self, transform_id: str, input_id: str) -> str: input_pcoll in proto.outputs.values()): return read_id # The GrpcRead is followed by the SDF/Truncate -> SDF/Process. - if (proto.spec.urn == - common_urns.sdf_components.TRUNCATE_SIZED_RESTRICTION.urn and + if (proto.spec.urn + == common_urns.sdf_components.TRUNCATE_SIZED_RESTRICTION.urn and input_pcoll in proto.outputs.values()): read_input = list( self.process_bundle_descriptor.transforms[read_id].inputs.values() diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py index 67d8d6fd333f..fdf291cb6f12 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py @@ -1295,15 +1295,15 @@ def _generate_splits_for_testing( self._worker_handler.control_conn.push(split_request).get()) for t in (0.05, 0.1, 0.2): if ('Unknown process bundle' in split_response.error or - split_response.process_bundle_split == - beam_fn_api_pb2.ProcessBundleSplitResponse()): + split_response.process_bundle_split + == beam_fn_api_pb2.ProcessBundleSplitResponse()): time.sleep(t) split_response = self._worker_handler.control_conn.push( split_request).get() logging.info('Got split response %s', split_response) if ('Unknown process bundle' in split_response.error or - split_response.process_bundle_split == - beam_fn_api_pb2.ProcessBundleSplitResponse()): + split_response.process_bundle_split + == beam_fn_api_pb2.ProcessBundleSplitResponse()): # It may have finished too fast. split_result = None elif split_response.error: @@ -1553,24 +1553,24 @@ def __init__(self, step_monitoring_infos, user_metrics_only=True): def query(self, filter=None): counters = [ - MetricResult(k, v, v) for k, - v in self._counters.items() if self.matches(filter, k) + MetricResult(k, v, v) for k, v in self._counters.items() + if self.matches(filter, k) ] distributions = [ - MetricResult(k, v, v) for k, - v in self._distributions.items() if self.matches(filter, k) + MetricResult(k, v, v) for k, v in self._distributions.items() + if self.matches(filter, k) ] gauges = [ - MetricResult(k, v, v) for k, - v in self._gauges.items() if self.matches(filter, k) + MetricResult(k, v, v) for k, v in self._gauges.items() + if self.matches(filter, k) ] string_sets = [ - MetricResult(k, v, v) for k, - v in self._string_sets.items() if self.matches(filter, k) + MetricResult(k, v, v) for k, v in self._string_sets.items() + if self.matches(filter, k) ] bounded_tries = [ - MetricResult(k, v, v) for k, - v in self._bounded_tries.items() if self.matches(filter, k) + MetricResult(k, v, v) for k, v in self._bounded_tries.items() + if self.matches(filter, k) ] return { diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py index 97fccdcda74f..fbb8c6fc9ec8 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py @@ -318,12 +318,27 @@ def infer_output_type(self, input_type): | beam.WindowInto(window.SlidingWindows(size=5, period=3)) | beam.ParDo(PerWindowDoFn())) - assert_that(res, equal_to([ 0*-3, 1*-3, # [-3, 2) - 0*0, 1*0, 2*0, 3* 0, 4* 0, # [ 0, 5) - 3*3, 4*3, 5*3, 6* 3, 7* 3, # [ 3, 8) - 6*6, 7*6, 8*6, 9* 6, # [ 6, 11) - 9*9 # [ 9, 14) - ])) + assert_that( + res, + equal_to([ + 0 * -3, + 1 * -3, # [-3, 2) + 0 * 0, + 1 * 0, + 2 * 0, + 3 * 0, + 4 * 0, # [ 0, 5) + 3 * 3, + 4 * 3, + 5 * 3, + 6 * 3, + 7 * 3, # [ 3, 8) + 6 * 6, + 7 * 6, + 8 * 6, + 9 * 6, # [ 6, 11) + 9 * 9 # [ 9, 14) + ])) def test_batch_to_element_pardo(self): class ArraySumDoFn(beam.DoFn): @@ -581,15 +596,12 @@ def test_multimap_multiside_input(self): side = p | 'side' >> beam.Create([('a', 1), ('b', 2), ('a', 3)]) assert_that( main | 'first map' >> beam.Map( - lambda k, - d, - l: (k, sorted(d[k]), sorted([e[1] for e in l])), + lambda k, d, l: (k, sorted(d[k]), sorted([e[1] for e in l])), beam.pvalue.AsMultiMap(side), beam.pvalue.AsList(side)) | 'second map' >> beam.Map( - lambda k, - d, - l: (k[0], sorted(d[k[0]]), sorted([e[1] for e in l])), + lambda k, d, l: + (k[0], sorted(d[k[0]]), sorted([e[1] for e in l])), beam.pvalue.AsMultiMap(side), beam.pvalue.AsList(side)), equal_to([('a', [1, 3], [1, 2, 3]), ('b', [2], [1, 2, 3])])) @@ -1182,8 +1194,7 @@ def test_large_elements(self): side_input_res = ( big | beam.Map( - lambda x, - side: (x[0], side.count(x[0])), + lambda x, side: (x[0], side.count(x[0])), beam.pvalue.AsList(big | beam.Map(lambda x: x[0])))) assert_that( side_input_res, @@ -1774,14 +1785,8 @@ def test_progress_metrics(self): | beam.GroupByKey() | 'm_out' >> beam.FlatMap( lambda x: [ - 1, - 2, - 3, - 4, - 5, - beam.pvalue.TaggedOutput('once', x), - beam.pvalue.TaggedOutput('twice', x), - beam.pvalue.TaggedOutput('twice', x) + 1, 2, 3, 4, 5, beam.pvalue.TaggedOutput('once', x), beam.pvalue. + TaggedOutput('twice', x), beam.pvalue.TaggedOutput('twice', x) ])) res = p.run() diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py index 0af26231b7c9..aadf5cfaa866 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py @@ -332,8 +332,7 @@ def executable_stage_transform( beam_runner_api_pb2.ExecutableStagePayload.TimerId( transform_id=transform_id, local_name=tag)) main_inputs.update( - pcoll_id for tag, - pcoll_id in transform.inputs.items() + pcoll_id for tag, pcoll_id in transform.inputs.items() if tag not in payload.side_inputs) else: main_inputs.update(transform.inputs.values()) @@ -341,8 +340,8 @@ def executable_stage_transform( main_input_id = only_element(main_inputs - all_outputs) named_inputs = dict({ - '%s:%s' % (side.transform_id, side.local_name): - stage_components.transforms[side.transform_id].inputs[side.local_name] + '%s:%s' % (side.transform_id, side.local_name): stage_components. + transforms[side.transform_id].inputs[side.local_name] for side in side_inputs }, main_input=main_input_id) @@ -367,8 +366,7 @@ def executable_stage_transform( inputs=named_inputs, outputs={ 'output_%d' % ix: pcoll - for ix, - pcoll in enumerate(external_outputs) + for ix, pcoll in enumerate(external_outputs) }, ) @@ -523,8 +521,8 @@ def maybe_length_prefixed_and_safe_coder(self, coder_id): # have the runner treat it as opaque bytes. return coder_id, self.bytes_coder_id elif (coder.spec.urn == common_urns.coders.WINDOWED_VALUE.urn and - self.components.coders[coder.component_coder_ids[1]].spec.urn not in - self._known_coder_urns): + self.components.coders[coder.component_coder_ids[1]].spec.urn + not in self._known_coder_urns): # A WindowedValue coder with an unknown window type. # This needs to be encoded in such a way that we still have access to its # timestmap. @@ -663,8 +661,7 @@ def pipeline_from_stages( roots = {} # type: Dict[str, Any] parents = { child: parent - for parent, - proto in pipeline_proto.components.transforms.items() + for parent, proto in pipeline_proto.components.transforms.items() for child in proto.subtransforms } @@ -802,8 +799,7 @@ def standard_optimize_phases(): pack_combiners, lift_combiners, expand_sdf, - fix_flatten_coders, - # sink_flattens, + fix_flatten_coders, # sink_flattens, greedily_fuse, read_to_impulse, extract_impulse_stages, @@ -1053,8 +1049,8 @@ def __init__(self, tags): def process(self, element): key, values = element return [ - core.pvalue.TaggedOutput(tag, (key, value)) for tag, - value in zip(self._tags, values) + core.pvalue.TaggedOutput(tag, (key, value)) + for tag, value in zip(self._tags, values) ] def _get_fallback_coder_id(): @@ -1092,8 +1088,8 @@ def _get_limit(stage_name): # and group eligible CombinePerKey stages by parent and environment. def get_stage_key(stage): if (len(stage.transforms) == 1 and can_pack(stage.name) and - stage.environment is not None and python_urns.PACKED_COMBINE_FN in - context.components.environments[stage.environment].capabilities): + stage.environment is not None and python_urns.PACKED_COMBINE_FN + in context.components.environments[stage.environment].capabilities): transform = only_transform(stage.transforms) if (transform.spec.urn == common_urns.composites.COMBINE_PER_KEY.urn and len(transform.inputs) == 1 and len(transform.outputs) == 1): @@ -1108,10 +1104,12 @@ def get_stage_key(stage): for stage in ineligible_stages: yield stage - grouped_packable_stages = [(stage_key, subgrouped_stages) for stage_key, - grouped_stages in grouped_eligible_stages.items() - for subgrouped_stages in _group_stages_with_limit( - grouped_stages, _get_limit)] + grouped_packable_stages = [ + (stage_key, subgrouped_stages) + for stage_key, grouped_stages in grouped_eligible_stages.items() + for subgrouped_stages in _group_stages_with_limit( + grouped_stages, _get_limit) + ] for stage_key, packable_stages in grouped_packable_stages: input_pcoll_id, _ = stage_key @@ -2041,7 +2039,8 @@ def sort_stages(stages, pipeline_context): producers = { pcoll: stage - for stage in all_stages for t in stage.transforms + for stage in all_stages + for t in stage.transforms for pcoll in t.outputs.values() } diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py index 3ff2421e6265..8cbf1b610a5e 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py @@ -411,8 +411,8 @@ def annotations(self): 'MyCombinePerKey(min)/Merge', 'MyCombinePerKey(min)/ExtractOutputs']: assert ( - "my_annotation" in - optimized.components.transforms[transform_id].annotations) + "my_annotation" + in optimized.components.transforms[transform_id].annotations) def test_conditionally_packed_combiners(self): class RecursiveCombine(beam.PTransform): diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager_test.py index 8a071520ad15..b7c57eeedab8 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager_test.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager_test.py @@ -116,8 +116,8 @@ def tsv(key, value, ts): equal_to([ ('k1', IntervalWindow(0, 1), [1, 2, 3]), # On the watermark ('k2', IntervalWindow(0, 1), [1, 2, 3]), # On the watermark - ('k1', IntervalWindow(1, 2), [4, 5]), # On the watermark - ('k2', IntervalWindow(1, 2), [4, 5]), # On the watermark + ('k1', IntervalWindow(1, 2), [4, 5]), # On the watermark + ('k2', IntervalWindow(1, 2), [4, 5]), # On the watermark ('k1', IntervalWindow(0, 1), [6]), # After the watermark ])) @@ -238,11 +238,11 @@ def tsv(key, value, ts): assert_that( result, equal_to([ - ('k1', IntervalWindow(1, 25), {1, 2, 3}), # early - ('k1', IntervalWindow(1, 25), {1, 2, 3}), # on time + ('k1', IntervalWindow(1, 25), {1, 2, 3}), # early + ('k1', IntervalWindow(1, 25), {1, 2, 3}), # on time ('k1', IntervalWindow(30, 40), {4}), # on time - ('k1', IntervalWindow(1, 25), {1, 2, 3, -3, -2}), # late - ('k1', IntervalWindow(1, 40), {1, 2, 3, 4, -3, -2, -1}), # late + ('k1', IntervalWindow(1, 25), {1, 2, 3, -3, -2}), # late + ('k1', IntervalWindow(1, 40), {1, 2, 3, 4, -3, -2, -1}), # late ])) diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py index e5c9e9c7ac89..ac346776565f 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py @@ -207,10 +207,11 @@ def get_conn_by_worker_id(self, worker_id): with self._lock: return self._connections_by_worker_id[worker_id] - def Control(self, - iterator, # type: Iterable[beam_fn_api_pb2.InstructionResponse] - context # type: ServicerContext - ): + def Control( + self, + iterator, # type: Iterable[beam_fn_api_pb2.InstructionResponse] + context # type: ServicerContext + ): # type: (...) -> Iterator[beam_fn_api_pb2.InstructionRequest] with self._lock: if self._state == self.DONE_STATE: @@ -262,12 +263,13 @@ class WorkerHandler(object): control_conn = None # type: ControlConnection data_conn = None # type: data_plane._GrpcDataChannel - def __init__(self, - control_handler, # type: Any - data_plane_handler, # type: Any - state, # type: sdk_worker.StateHandler - provision_info # type: ExtendedProvisionInfo - ): + def __init__( + self, + control_handler, # type: Any + data_plane_handler, # type: Any + state, # type: sdk_worker.StateHandler + provision_info # type: ExtendedProvisionInfo + ): # type: (...) -> None """Initialize a WorkerHandler. @@ -334,12 +336,13 @@ def wrapper(constructor): return wrapper @classmethod - def create(cls, - environment, # type: beam_runner_api_pb2.Environment - state, # type: sdk_worker.StateHandler - provision_info, # type: ExtendedProvisionInfo - grpc_server # type: GrpcServer - ): + def create( + cls, + environment, # type: beam_runner_api_pb2.Environment + state, # type: sdk_worker.StateHandler + provision_info, # type: ExtendedProvisionInfo + grpc_server # type: GrpcServer + ): # type: (...) -> WorkerHandler constructor, payload_type = cls._registered_environments[environment.urn] return constructor( @@ -355,13 +358,13 @@ def create(cls, @WorkerHandler.register_environment(python_urns.EMBEDDED_PYTHON, None) class EmbeddedWorkerHandler(WorkerHandler): """An in-memory worker_handler for fn API control, state and data planes.""" - - def __init__(self, - unused_payload, # type: None - state, # type: sdk_worker.StateHandler - provision_info, # type: ExtendedProvisionInfo - worker_manager, # type: WorkerHandlerManager - ): + def __init__( + self, + unused_payload, # type: None + state, # type: sdk_worker.StateHandler + provision_info, # type: ExtendedProvisionInfo + worker_manager, # type: WorkerHandlerManager + ): # type: (...) -> None super().__init__( self, data_plane.InMemoryDataChannel(), state, provision_info) @@ -448,11 +451,12 @@ class GrpcServer(object): _DEFAULT_SHUTDOWN_TIMEOUT_SECS = 5 - def __init__(self, - state, # type: StateServicer - provision_info, # type: Optional[ExtendedProvisionInfo] - worker_manager, # type: WorkerHandlerManager - ): + def __init__( + self, + state, # type: StateServicer + provision_info, # type: Optional[ExtendedProvisionInfo] + worker_manager, # type: WorkerHandlerManager + ): # type: (...) -> None # Options to have no limits (-1) on the size of the messages @@ -539,12 +543,12 @@ def close(self): class GrpcWorkerHandler(WorkerHandler): """An grpc based worker_handler for fn API control, state and data planes.""" - - def __init__(self, - state, # type: StateServicer - provision_info, # type: ExtendedProvisionInfo - grpc_server # type: GrpcServer - ): + def __init__( + self, + state, # type: StateServicer + provision_info, # type: ExtendedProvisionInfo + grpc_server # type: GrpcServer + ): # type: (...) -> None self._grpc_server = grpc_server super().__init__( @@ -604,12 +608,13 @@ def host_from_worker(self): @WorkerHandler.register_environment( common_urns.environments.EXTERNAL.urn, beam_runner_api_pb2.ExternalPayload) class ExternalWorkerHandler(GrpcWorkerHandler): - def __init__(self, - external_payload, # type: beam_runner_api_pb2.ExternalPayload - state, # type: StateServicer - provision_info, # type: ExtendedProvisionInfo - grpc_server # type: GrpcServer - ): + def __init__( + self, + external_payload, # type: beam_runner_api_pb2.ExternalPayload + state, # type: StateServicer + provision_info, # type: ExtendedProvisionInfo + grpc_server # type: GrpcServer + ): # type: (...) -> None super().__init__(state, provision_info, grpc_server) self._external_payload = external_payload @@ -650,12 +655,13 @@ def host_from_worker(self): @WorkerHandler.register_environment(python_urns.EMBEDDED_PYTHON_GRPC, bytes) class EmbeddedGrpcWorkerHandler(GrpcWorkerHandler): - def __init__(self, - payload, # type: bytes - state, # type: StateServicer - provision_info, # type: ExtendedProvisionInfo - grpc_server # type: GrpcServer - ): + def __init__( + self, + payload, # type: bytes + state, # type: StateServicer + provision_info, # type: ExtendedProvisionInfo + grpc_server # type: GrpcServer + ): # type: (...) -> None super().__init__(state, provision_info, grpc_server) @@ -691,12 +697,13 @@ def stop_worker(self): @WorkerHandler.register_environment(python_urns.SUBPROCESS_SDK, bytes) class SubprocessSdkWorkerHandler(GrpcWorkerHandler): - def __init__(self, - worker_command_line, # type: bytes - state, # type: StateServicer - provision_info, # type: ExtendedProvisionInfo - grpc_server # type: GrpcServer - ): + def __init__( + self, + worker_command_line, # type: bytes + state, # type: StateServicer + provision_info, # type: ExtendedProvisionInfo + grpc_server # type: GrpcServer + ): # type: (...) -> None super().__init__(state, provision_info, grpc_server) self._worker_command_line = worker_command_line @@ -721,12 +728,13 @@ def stop_worker(self): @WorkerHandler.register_environment( common_urns.environments.DOCKER.urn, beam_runner_api_pb2.DockerPayload) class DockerSdkWorkerHandler(GrpcWorkerHandler): - def __init__(self, - payload, # type: beam_runner_api_pb2.DockerPayload - state, # type: StateServicer - provision_info, # type: ExtendedProvisionInfo - grpc_server # type: GrpcServer - ): + def __init__( + self, + payload, # type: beam_runner_api_pb2.DockerPayload + state, # type: StateServicer + provision_info, # type: ExtendedProvisionInfo + grpc_server # type: GrpcServer + ): # type: (...) -> None super().__init__(state, provision_info, grpc_server) self._container_image = payload.container_image @@ -853,10 +861,11 @@ class WorkerHandlerManager(object): Caches ``WorkerHandler``s based on environment id. """ - def __init__(self, - environments, # type: Mapping[str, beam_runner_api_pb2.Environment] - job_provision_info # type: ExtendedProvisionInfo - ): + def __init__( + self, + environments, # type: Mapping[str, beam_runner_api_pb2.Environment] + job_provision_info # type: ExtendedProvisionInfo + ): # type: (...) -> None self._environments = environments self._job_provision_info = job_provision_info @@ -990,7 +999,8 @@ def commit(self): return self._underlying class CopyOnWriteList(object): - def __init__(self, + def __init__( + self, underlying, # type: DefaultDict[bytes, Buffer] overlay, # type: Dict[bytes, Buffer] key # type: bytes @@ -1064,10 +1074,11 @@ def _get_one_interval_key(self, state_key, start): state_key_copy.ordered_list_user_state.range.end = start + 1 return self._to_key(state_key_copy) - def get_raw(self, + def get_raw( + self, state_key, # type: beam_fn_api_pb2.StateKey continuation_token=None # type: Optional[bytes] - ): + ): # type: (...) -> Tuple[bytes, Optional[bytes]] if state_key.WhichOneof('type') not in self._SUPPORTED_STATE_TYPES: @@ -1181,10 +1192,11 @@ def __init__(self, state): # type: (StateServicer) -> None self._state = state - def State(self, + def State( + self, request_stream, # type: Iterable[beam_fn_api_pb2.StateRequest] context=None # type: Any - ): + ): # type: (...) -> Iterator[beam_fn_api_pb2.StateResponse] # Note that this eagerly mutates state, assuming any failures are fatal. # Thus it is safe to ignore instruction_id. @@ -1232,10 +1244,11 @@ def close(self): class ControlFuture(object): - def __init__(self, - instruction_id, # type: str - response=None # type: Optional[beam_fn_api_pb2.InstructionResponse] - ): + def __init__( + self, + instruction_id, # type: str + response=None # type: Optional[beam_fn_api_pb2.InstructionResponse] + ): # type: (...) -> None self.instruction_id = instruction_id self._response = response diff --git a/sdks/python/apache_beam/runners/portability/local_job_service.py b/sdks/python/apache_beam/runners/portability/local_job_service.py index a2b4e5e7f939..68e8d6922f20 100644 --- a/sdks/python/apache_beam/runners/portability/local_job_service.py +++ b/sdks/python/apache_beam/runners/portability/local_job_service.py @@ -88,12 +88,12 @@ def __init__(self, staging_dir=None, beam_job_type=None): endpoints_pb2.ApiServiceDescriptor] = None self._beam_job_type = beam_job_type or BeamJob - def create_beam_job(self, - preparation_id, # stype: str - job_name: str, - pipeline: beam_runner_api_pb2.Pipeline, - options: struct_pb2.Struct - ) -> 'BeamJob': + def create_beam_job( + self, + preparation_id, # stype: str + job_name: str, + pipeline: beam_runner_api_pb2.Pipeline, + options: struct_pb2.Struct) -> 'BeamJob': self._artifact_service.register_job( staging_token=preparation_id, dependency_sets=_extract_dependency_sets( diff --git a/sdks/python/apache_beam/runners/portability/portable_runner.py b/sdks/python/apache_beam/runners/portability/portable_runner.py index fe9dcfa62b29..43ca6ca3c38c 100644 --- a/sdks/python/apache_beam/runners/portability/portable_runner.py +++ b/sdks/python/apache_beam/runners/portability/portable_runner.py @@ -181,8 +181,7 @@ def convert_pipeline_option_value(v): # TODO: Define URNs for options. p_options = { 'beam:option:' + k + ':v1': convert_pipeline_option_value(v) - for k, - v in all_options.items() if v is not None + for k, v in all_options.items() if v is not None } return job_utils.dict_to_struct(p_options) diff --git a/sdks/python/apache_beam/runners/portability/prism_runner.py b/sdks/python/apache_beam/runners/portability/prism_runner.py index 2082f4aa5616..654ad8da8261 100644 --- a/sdks/python/apache_beam/runners/portability/prism_runner.py +++ b/sdks/python/apache_beam/runners/portability/prism_runner.py @@ -284,8 +284,8 @@ def path_to_binary(self) -> str: # We failed to build for some reason. output = process.stdout.decode("utf-8") - if ("not in a module" not in output) and ( - "no required module provides" not in output): + if ("not in a module" not in output) and ("no required module provides" + not in output): # This branch handles two classes of failures: # 1. Go isn't installed, so it needs to be installed by the Beam SDK # developer. diff --git a/sdks/python/apache_beam/runners/portability/prism_runner_test.py b/sdks/python/apache_beam/runners/portability/prism_runner_test.py index be4c0b873a3e..00116e123ce4 100644 --- a/sdks/python/apache_beam/runners/portability/prism_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/prism_runner_test.py @@ -281,11 +281,9 @@ def _extract_side_effect(self, fn, path=None): [False, False]]) def test_with_unknown_path(self, custom_bin_cache, ignore_cache): self.assertRaises( - FileNotFoundError, - lambda: self.job_server.local_bin( - "/path/unknown", - bin_cache=self.cache_dir if custom_bin_cache else '', - ignore_cache=ignore_cache)) + FileNotFoundError, lambda: self.job_server.local_bin( + "/path/unknown", bin_cache=self.cache_dir + if custom_bin_cache else '', ignore_cache=ignore_cache)) @parameterized.expand([ [True, True, True], diff --git a/sdks/python/apache_beam/runners/portability/stager.py b/sdks/python/apache_beam/runners/portability/stager.py index cb0a3c88a477..338901c21058 100644 --- a/sdks/python/apache_beam/runners/portability/stager.py +++ b/sdks/python/apache_beam/runners/portability/stager.py @@ -214,8 +214,8 @@ def create_job_resources( if not skip_prestaged_dependencies: requirements_cache_path = ( os.path.join(tempfile.gettempdir(), 'dataflow-requirements-cache') if - (setup_options.requirements_cache is None) else - setup_options.requirements_cache) + (setup_options.requirements_cache + is None) else setup_options.requirements_cache) if (setup_options.requirements_cache != SKIP_REQUIREMENTS_CACHE and not os.path.exists(requirements_cache_path)): os.makedirs(requirements_cache_path) diff --git a/sdks/python/apache_beam/runners/render.py b/sdks/python/apache_beam/runners/render.py index 45e66e1ba06a..0827d73cc307 100644 --- a/sdks/python/apache_beam/runners/render.py +++ b/sdks/python/apache_beam/runners/render.py @@ -151,9 +151,8 @@ def __init__(self, pipeline, options): if options.render_leaf_composite_nodes: is_leaf = lambda transform_id: any( re.match( - pattern, - self.pipeline.components.transforms[transform_id].unique_name) - for patterns in options.render_leaf_composite_nodes + pattern, self.pipeline.components.transforms[transform_id]. + unique_name) for patterns in options.render_leaf_composite_nodes for pattern in patterns.split(',')) self.leaf_composites = set() diff --git a/sdks/python/apache_beam/runners/trivial_runner.py b/sdks/python/apache_beam/runners/trivial_runner.py index af8f4f92c4e3..6517a3adc373 100644 --- a/sdks/python/apache_beam/runners/trivial_runner.py +++ b/sdks/python/apache_beam/runners/trivial_runner.py @@ -278,10 +278,10 @@ def group_by_key_and_window(self, input_pcoll, output_pcoll, execution_state): windowing = components.windowing_strategies[ components.pcollections[input_pcoll].windowing_strategy_id] - if (windowing.merge_status == - beam_runner_api_pb2.MergeStatus.Enum.NON_MERGING and - windowing.output_time == - beam_runner_api_pb2.OutputTime.Enum.END_OF_WINDOW): + if (windowing.merge_status + == beam_runner_api_pb2.MergeStatus.Enum.NON_MERGING and + windowing.output_time + == beam_runner_api_pb2.OutputTime.Enum.END_OF_WINDOW): # This is the "easy" case, show how to do it by hand. # Note that we're grouping by encoded key, and also by the window. grouped = collections.defaultdict(list) diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor.py b/sdks/python/apache_beam/runners/worker/bundle_processor.py index 89c137fe4366..ad48358d588e 100644 --- a/sdks/python/apache_beam/runners/worker/bundle_processor.py +++ b/sdks/python/apache_beam/runners/worker/bundle_processor.py @@ -725,8 +725,8 @@ def add(self, start: int, end: int) -> None: def __contains__(self, key: int) -> bool: idx = self._sorted_starts.bisect_left(key) - return (idx < len(self._sorted_starts) and self._sorted_starts[idx] == key - ) or (idx > 0 and self._sorted_ends[idx - 1] > key) + return (idx < len(self._sorted_starts) and self._sorted_starts[idx] + == key) or (idx > 0 and self._sorted_ends[idx - 1] > key) def __len__(self) -> int: assert len(self._sorted_starts) == len(self._sorted_ends) @@ -1166,8 +1166,8 @@ def is_side_input(transform_proto, tag): def get_operation(transform_id: str) -> operations.Operation: transform_consumers = { tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]] - for tag, - pcoll_id in descriptor.transforms[transform_id].outputs.items() + for tag, pcoll_id in + descriptor.transforms[transform_id].outputs.items() } # Initialize transform-specific state in the Data Sampler. @@ -1287,8 +1287,8 @@ def process_bundle( timer_info.output_stream.close() return ([ - self.delayed_bundle_application(op, residual) for op, - residual in execution_context.delayed_applications + self.delayed_bundle_application(op, residual) + for op, residual in execution_context.delayed_applications ], self.requires_finalization()) @@ -1445,10 +1445,9 @@ def __init__( self.state_handler = state_handler self.context = pipeline_context.PipelineContext( descriptor, - iterable_state_read=lambda token, - element_coder_impl: _StateBackedIterable( - state_handler, - beam_fn_api_pb2.StateKey( + iterable_state_read=lambda token, element_coder_impl: + _StateBackedIterable( + state_handler, beam_fn_api_pb2.StateKey( runner=beam_fn_api_pb2.StateKey.Runner(key=token)), element_coder_impl)) self.data_sampler = data_sampler @@ -1539,8 +1538,7 @@ def get_output_coders( ) -> Dict[str, coders.Coder]: return { tag: self.get_windowed_coder(pcoll_id) - for tag, - pcoll_id in transform_proto.outputs.items() + for tag, pcoll_id in transform_proto.outputs.items() } def get_only_output_coder( @@ -1552,8 +1550,7 @@ def get_input_coders( ) -> Dict[str, coders.WindowedValueCoder]: return { tag: self.get_windowed_coder(pcoll_id) - for tag, - pcoll_id in transform_proto.inputs.items() + for tag, pcoll_id in transform_proto.inputs.items() } def get_only_input_coder( @@ -1851,8 +1848,7 @@ def _create_pardo_operation( input_tags_to_coders = factory.get_input_coders(transform_proto) tagged_side_inputs = [ (tag, beam.pvalue.SideInputData.from_runner_api(si, factory.context)) - for tag, - si in pardo_proto.side_inputs.items() + for tag, si in pardo_proto.side_inputs.items() ] tagged_side_inputs.sort( key=lambda tag_si: sideinputs.get_sideinput_index(tag_si[0])) diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py index 2f9de24594b2..d7c77491eb4e 100644 --- a/sdks/python/apache_beam/runners/worker/data_plane.py +++ b/sdks/python/apache_beam/runners/worker/data_plane.py @@ -135,7 +135,7 @@ def __init__( close_callback=None, # type: Optional[Callable[[bytes], None]] flush_callback=None, # type: Optional[Callable[[bytes], None]] size_flush_threshold=_DEFAULT_SIZE_FLUSH_THRESHOLD, # type: int - large_buffer_warn_threshold_bytes = 512 << 20 # type: int + large_buffer_warn_threshold_bytes=512 << 20 # type: int ): super().__init__(close_callback) self._flush_callback = flush_callback @@ -233,7 +233,6 @@ def _flush(): class PeriodicThread(threading.Thread): """Call a function periodically with the specified number of seconds""" - def __init__( self, interval, # type: float @@ -726,7 +725,6 @@ def set_inputs(self, elements_iterator): class GrpcClientDataChannel(_GrpcDataChannel): """A DataChannel wrapping the client side of a BeamFnData connection.""" - def __init__( self, data_stub, # type: beam_fn_api_pb2_grpc.BeamFnDataStub @@ -795,7 +793,6 @@ class GrpcClientDataChannelFactory(DataChannelFactory): Caches the created channels by ``data descriptor url``. """ - def __init__( self, credentials=None, # type: Any diff --git a/sdks/python/apache_beam/runners/worker/data_sampler.py b/sdks/python/apache_beam/runners/worker/data_sampler.py index a0f02a51c8ad..c95c88f2dbdf 100644 --- a/sdks/python/apache_beam/runners/worker/data_sampler.py +++ b/sdks/python/apache_beam/runners/worker/data_sampler.py @@ -155,9 +155,8 @@ def flush(self, clear: bool = True) -> List[beam_fn_api_pb2.SampledElement]: exceptions = [s for s in self._exceptions] samples = [s for s in self._samples if id(s) not in seen] else: - exceptions = [ - (self.remove_windowed_value(a), b) for a, b in self._exceptions - ] + exceptions = [(self.remove_windowed_value(a), b) + for a, b in self._exceptions] samples = [ self.remove_windowed_value(s) for s in self._samples if id(s) not in seen @@ -186,8 +185,7 @@ def flush(self, clear: bool = True) -> List[beam_fn_api_pb2.SampledElement]: exception=beam_fn_api_pb2.SampledElement.Exception( instruction_id=exn.instruction_id, transform_id=exn.transform_id, - error=exn.msg)) for s, - exn in exceptions) + error=exn.msg)) for s, exn in exceptions) except Exception as e: # pylint: disable=broad-except _LOGGER.warning('Could not encode sampled exception values: %s' % e) diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py index 979c7cdb53be..69815acc7194 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler.py +++ b/sdks/python/apache_beam/runners/worker/log_handler.py @@ -111,8 +111,8 @@ def map_log_level( return LOG_LEVEL_TO_LOGENTRY_MAP[level] except KeyError: return max( - beam_level for python_level, - beam_level in LOG_LEVEL_TO_LOGENTRY_MAP.items() + beam_level + for python_level, beam_level in LOG_LEVEL_TO_LOGENTRY_MAP.items() if python_level <= level) def emit(self, record: logging.LogRecord) -> None: diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py index 2cf7dff9d57f..7018cfaf683b 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler_test.py +++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py @@ -316,8 +316,7 @@ def test_extracts_transform_id_during_exceptions(self): def _create_test(name, num_logs): setattr( FnApiLogRecordHandlerTest, - 'test_%s' % name, - lambda self: self._verify_fn_log_handler(num_logs)) + 'test_%s' % name, lambda self: self._verify_fn_log_handler(num_logs)) for test_name, num_logs_entries in data.items(): diff --git a/sdks/python/apache_beam/runners/worker/opcounters.py b/sdks/python/apache_beam/runners/worker/opcounters.py index 5496bccd014e..f5883cfbf2ef 100644 --- a/sdks/python/apache_beam/runners/worker/opcounters.py +++ b/sdks/python/apache_beam/runners/worker/opcounters.py @@ -122,13 +122,12 @@ class SideInputReadCounter(TransformIOCounter): Note that the declaring step originally receives the side input, but it may not be the only step that spends time reading from this side input. """ - - def __init__(self, - counter_factory, - state_sampler, # type: StateSampler - declaring_step, - input_index - ): + def __init__( + self, + counter_factory, + state_sampler, # type: StateSampler + declaring_step, + input_index): """Create a side input read counter. Args: @@ -186,7 +185,7 @@ def __init__( index, suffix='out', producer_type_hints=None, - producer_batch_converter=None, # type: Optional[BatchConverter] + producer_batch_converter=None, # type: Optional[BatchConverter] ): self._counter_factory = counter_factory self.element_counter = counter_factory.get_counter( diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py index 1b86cdaae561..0823a1e37484 100644 --- a/sdks/python/apache_beam/runners/worker/operation_specs.py +++ b/sdks/python/apache_beam/runners/worker/operation_specs.py @@ -56,25 +56,14 @@ def worker_printable_fields(workerproto): '%s=%s' % (name, value) # _asdict is the only way and cannot subclass this generated class # pylint: disable=protected-access - for name, - value in workerproto._asdict().items() + for name, value in workerproto._asdict().items() # want to output value 0 but not None nor [] if (value or value == 0) and name not in ( - 'coder', - 'coders', - 'output_coders', - 'elements', - 'combine_fn', - 'serialized_fn', - 'window_fn', - 'append_trailing_newlines', - 'strip_trailing_newlines', - 'compression_type', - 'context', - 'start_shuffle_position', - 'end_shuffle_position', - 'shuffle_reader_config', - 'shuffle_writer_config') + 'coder', 'coders', 'output_coders', 'elements', 'combine_fn', + 'serialized_fn', 'window_fn', 'append_trailing_newlines', + 'strip_trailing_newlines', 'compression_type', 'context', + 'start_shuffle_position', 'end_shuffle_position', + 'shuffle_reader_config', 'shuffle_writer_config') ] diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py index 87fb3005cbc7..2b20bebe0940 100644 --- a/sdks/python/apache_beam/runners/worker/operations.py +++ b/sdks/python/apache_beam/runners/worker/operations.py @@ -114,15 +114,16 @@ class ConsumerSet(Receiver): ConsumerSet are attached to the outputting Operation. """ @staticmethod - def create(counter_factory, - step_name, # type: str - output_index, - consumers, # type: List[Operation] - coder, - producer_type_hints, - producer_batch_converter, # type: Optional[BatchConverter] - output_sampler=None, # type: Optional[OutputSampler] - ): + def create( + counter_factory, + step_name, # type: str + output_index, + consumers, # type: List[Operation] + coder, + producer_type_hints, + producer_batch_converter, # type: Optional[BatchConverter] + output_sampler=None, # type: Optional[OutputSampler] + ): # type: (...) -> ConsumerSet if len(consumers) == 1: consumer = consumers[0] @@ -151,16 +152,16 @@ def create(counter_factory, producer_batch_converter, output_sampler) - def __init__(self, - counter_factory, - step_name, # type: str - output_index, - consumers, - coder, - producer_type_hints, - producer_batch_converter, - output_sampler - ): + def __init__( + self, + counter_factory, + step_name, # type: str + output_index, + consumers, + coder, + producer_type_hints, + producer_batch_converter, + output_sampler): self.opcounter = opcounters.OperationCounters( counter_factory, step_name, @@ -237,15 +238,15 @@ def __repr__(self): class SingletonElementConsumerSet(ConsumerSet): """ConsumerSet representing a single consumer that can only process elements (not batches).""" - def __init__(self, - counter_factory, - step_name, - output_index, - consumer, # type: Operation - coder, - producer_type_hints, - output_sampler - ): + def __init__( + self, + counter_factory, + step_name, + output_index, + consumer, # type: Operation + coder, + producer_type_hints, + output_sampler): super().__init__( counter_factory, step_name, @@ -283,15 +284,16 @@ class GeneralPurposeConsumerSet(ConsumerSet): """ MAX_BATCH_SIZE = 4096 - def __init__(self, - counter_factory, - step_name, # type: str - output_index, - coder, - producer_type_hints, - consumers, # type: List[Operation] - producer_batch_converter, - output_sampler): + def __init__( + self, + counter_factory, + step_name, # type: str + output_index, + coder, + producer_type_hints, + consumers, # type: List[Operation] + producer_batch_converter, + output_sampler): super().__init__( counter_factory, step_name, @@ -413,13 +415,13 @@ class Operation(object): An operation can have one or more outputs and for each output it can have one or more receiver operations that will take that as input. """ - - def __init__(self, - name_context, # type: common.NameContext - spec, - counter_factory, - state_sampler # type: StateSampler - ): + def __init__( + self, + name_context, # type: common.NameContext + spec, + counter_factory, + state_sampler # type: StateSampler + ): """Initializes a worker operation instance. Args: @@ -488,8 +490,8 @@ def get_output_sampler(output_num): coder, self._get_runtime_performance_hints(), self.get_output_batch_converter(), - get_output_sampler(i)) for i, - coder in enumerate(self.spec.output_coders) + get_output_sampler(i)) + for i, coder in enumerate(self.spec.output_coders) ] self.setup_done = True @@ -792,15 +794,15 @@ def total_output_bytes(self): class DoOperation(Operation): """A Do operation that will execute a custom DoFn for each input element.""" - - def __init__(self, - name, # type: common.NameContext - spec, # operation_specs.WorkerDoFn # need to fix this type - counter_factory, - sampler, - side_input_maps=None, - user_state_context=None, - ): + def __init__( + self, + name, # type: common.NameContext + spec, # operation_specs.WorkerDoFn # need to fix this type + counter_factory, + sampler, + side_input_maps=None, + user_state_context=None, + ): super(DoOperation, self).__init__(name, spec, counter_factory, sampler) self.side_input_maps = side_input_maps self.user_state_context = user_state_context diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py index 3cb1a26b77f1..7a1b30df1e29 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py @@ -174,8 +174,8 @@ def __init__( data_sampler=None, # type: Optional[data_sampler.DataSampler] # Unrecoverable SDK harness initialization error (if any) # that should be reported to the runner when proocessing the first bundle. - deferred_exception=None, # type: Optional[Exception] - runner_capabilities=frozenset(), # type: FrozenSet[str] + deferred_exception=None, # type: Optional[Exception] + runner_capabilities=frozenset(), # type: FrozenSet[str] ): # type: (...) -> None self._alive = True @@ -361,8 +361,7 @@ def _request_harness_monitoring_infos(self, request): ).to_runner_api_monitoring_infos(None).values() self._execute( lambda: beam_fn_api_pb2.InstructionResponse( - instruction_id=request.instruction_id, - harness_monitoring_infos=( + instruction_id=request.instruction_id, harness_monitoring_infos=( beam_fn_api_pb2.HarnessMonitoringInfosResponse( monitoring_data={ SHORT_ID_CACHE.get_short_id(info): info.payload @@ -374,8 +373,8 @@ def _request_monitoring_infos(self, request): # type: (beam_fn_api_pb2.InstructionRequest) -> None self._execute( lambda: beam_fn_api_pb2.InstructionResponse( - instruction_id=request.instruction_id, - monitoring_infos=beam_fn_api_pb2.MonitoringInfosMetadataResponse( + instruction_id=request.instruction_id, monitoring_infos= + beam_fn_api_pb2.MonitoringInfosMetadataResponse( monitoring_info=SHORT_ID_CACHE.get_infos( request.monitoring_infos.monitoring_info_id))), request) @@ -1297,10 +1296,11 @@ def _lazy_iterator( if not continuation_token: break - def _get_raw(self, + def _get_raw( + self, state_key, # type: beam_fn_api_pb2.StateKey continuation_token # type: Optional[bytes] - ): + ): # type: (...) -> Tuple[coder_impl.create_InputStream, Optional[bytes]] """Call underlying get_raw with performance statistics and detection.""" diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py index 3389f0c7afb1..b3c81fd93467 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py @@ -232,8 +232,7 @@ def _load_pipeline_options(options_json): return { re.match(portable_option_regex, k).group('key') if re.match( portable_option_regex, k) else k: v - for k, - v in options.items() + for k, v in options.items() } diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py index 498a07b70e9e..5ecd9616fcf9 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker_main_test.py @@ -160,7 +160,7 @@ def test__get_log_level_from_options_dict(self): def test__set_log_level_overrides(self): test_cases = [ - ([], {}), # not provided, as a smoke test + ([], {}), # not provided, as a smoke test ( # single overrides ['{"fake_module_1a.b":"DEBUG","fake_module_1c.d":"INFO"}'], @@ -168,8 +168,7 @@ def test__set_log_level_overrides(self): "fake_module_1a.b": logging.DEBUG, "fake_module_1a.b.f": logging.DEBUG, "fake_module_1c.d": logging.INFO - } - ), + }), ( # multiple overrides, the last takes precedence [ @@ -183,8 +182,7 @@ def test__set_log_level_overrides(self): "fake_module_2c.d": logging.ERROR, "fake_module_2c.d.e": 15, "fake_module_2c.d.f": logging.ERROR - } - ) + }) ] for case, expected in test_cases: overrides = self._overrides_case_to_option_dict(case) diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py index 17bf043d020c..214a085fad28 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py @@ -574,77 +574,60 @@ def testShortIdAssignment(self): test_cases = [ TestCase(*args) for args in [ ( - "1", - metrics_pb2.MonitoringInfo( + "1", metrics_pb2.MonitoringInfo( urn="beam:metric:user:distribution_int64:v1", type="beam:metrics:distribution_int64:v1")), ( - "2", - metrics_pb2.MonitoringInfo( + "2", metrics_pb2.MonitoringInfo( urn="beam:metric:element_count:v1", type="beam:metrics:sum_int64:v1")), ( - "3", - metrics_pb2.MonitoringInfo( + "3", metrics_pb2.MonitoringInfo( urn="beam:metric:ptransform_progress:completed:v1", type="beam:metrics:progress:v1")), ( - "4", - metrics_pb2.MonitoringInfo( + "4", metrics_pb2.MonitoringInfo( urn="beam:metric:user:distribution_double:v1", type="beam:metrics:distribution_double:v1")), ( - "5", - metrics_pb2.MonitoringInfo( + "5", metrics_pb2.MonitoringInfo( urn="TestingSentinelUrn", type="TestingSentinelType")), ( - "6", - metrics_pb2.MonitoringInfo( + "6", metrics_pb2.MonitoringInfo( urn= "beam:metric:pardo_execution_time:finish_bundle_msecs:v1", type="beam:metrics:sum_int64:v1")), # This case and the next one validates that different labels # with the same urn are in fact assigned different short ids. ( - "7", - metrics_pb2.MonitoringInfo( + "7", metrics_pb2.MonitoringInfo( urn="beam:metric:user:sum_int64:v1", - type="beam:metrics:sum_int64:v1", - labels={ - "PTRANSFORM": "myT", - "NAMESPACE": "harness", + type="beam:metrics:sum_int64:v1", labels={ + "PTRANSFORM": "myT", "NAMESPACE": "harness", "NAME": "metricNumber7" })), ( - "8", - metrics_pb2.MonitoringInfo( + "8", metrics_pb2.MonitoringInfo( urn="beam:metric:user:sum_int64:v1", - type="beam:metrics:sum_int64:v1", - labels={ - "PTRANSFORM": "myT", - "NAMESPACE": "harness", + type="beam:metrics:sum_int64:v1", labels={ + "PTRANSFORM": "myT", "NAMESPACE": "harness", "NAME": "metricNumber8" })), ( - "9", - metrics_pb2.MonitoringInfo( + "9", metrics_pb2.MonitoringInfo( urn="beam:metric:user:top_n_double:v1", - type="beam:metrics:top_n_double:v1", - labels={ - "PTRANSFORM": "myT", - "NAMESPACE": "harness", + type="beam:metrics:top_n_double:v1", labels={ + "PTRANSFORM": "myT", "NAMESPACE": "harness", "NAME": "metricNumber7" })), ( - "a", - metrics_pb2.MonitoringInfo( + "a", metrics_pb2.MonitoringInfo( urn="beam:metric:element_count:v1", type="beam:metrics:sum_int64:v1", labels={"PCOLLECTION": "myPCol"})), # validate payload is ignored for shortId assignment ( - "3", - metrics_pb2.MonitoringInfo( + "3", metrics_pb2.MonitoringInfo( urn="beam:metric:ptransform_progress:completed:v1", type="beam:metrics:progress:v1", payload=b"this is ignored!")) @@ -680,8 +663,8 @@ def testShortIdAssignment(self): def monitoringInfoMetadata(info): return { descriptor.name: value - for descriptor, - value in info.ListFields() if not descriptor.name == "payload" + for descriptor, value in info.ListFields() + if not descriptor.name == "payload" } diff --git a/sdks/python/apache_beam/runners/worker/statecache_test.py b/sdks/python/apache_beam/runners/worker/statecache_test.py index a5d1ff2e01e3..7e02ff6a416a 100644 --- a/sdks/python/apache_beam/runners/worker/statecache_test.py +++ b/sdks/python/apache_beam/runners/worker/statecache_test.py @@ -269,11 +269,13 @@ def load_key(output): output["time"] = time.time_ns() t1_output = {} - t1 = threading.Thread(target=load_key, args=(t1_output, )) + t1 = threading.Thread( + target=load_key, args=(t1_output, )) t1.start() t2_output = {} - t2 = threading.Thread(target=load_key, args=(t2_output, )) + t2 = threading.Thread( + target=load_key, args=(t2_output, )) t2.start() # Wait for both threads to start @@ -311,7 +313,8 @@ def load_key(output): output["value"] = cache.get("key", wait_for_event) t1_output = {} - t1 = threading.Thread(target=load_key, args=(t1_output, )) + t1 = threading.Thread( + target=load_key, args=(t1_output, )) t1.start() # Wait for the load to start, update the key, and then let the load finish @@ -366,19 +369,21 @@ def test_get_deep_size_builtin_objects(self): built-in objects. """ primitive_test_objects = [ - 1, # int - 2.0, # float - 1+1j, # complex - True, # bool - 'hello,world', # str - b'\00\01\02', # bytes + 1, # int + 2.0, # float + 1 + 1j, # complex + True, # bool + 'hello,world', # str + b'\00\01\02', # bytes ] collection_test_objects = [ - [3, 4, 5], # list - (6, 7), # tuple - {'a', 'b', 'c'}, # set - {'k': 8, 'l': 9}, # dict + [3, 4, 5], # list + (6, 7), # tuple + {'a', 'b', 'c'}, # set + { + 'k': 8, 'l': 9 + }, # dict ] for obj in primitive_test_objects: diff --git a/sdks/python/apache_beam/runners/worker/worker_status.py b/sdks/python/apache_beam/runners/worker/worker_status.py index 186453ffd375..d67bd4437fbb 100644 --- a/sdks/python/apache_beam/runners/worker/worker_status.py +++ b/sdks/python/apache_beam/runners/worker/worker_status.py @@ -289,8 +289,8 @@ def _get_stack_trace(self, sampler_info): return '-NOT AVAILABLE-' def _passed_lull_timeout_since_last_log(self) -> bool: - if (time.time() - self._last_lull_logged_secs > - self.log_lull_timeout_ns / 1e9): + if (time.time() - self._last_lull_logged_secs + > self.log_lull_timeout_ns / 1e9): self._last_lull_logged_secs = time.time() return True else: diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py index a12b06c8c3eb..93b9fb342bc0 100644 --- a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py +++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py @@ -68,7 +68,9 @@ def get_test_config_container( ) -def get_change_point_config(params: Dict[str, Any], ) -> ChangePointConfig: +def get_change_point_config( + params: Dict[str, Any], +) -> ChangePointConfig: """ Args: params: Dict containing parameters to run change point analysis. diff --git a/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/workflow.py b/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/workflow.py index e60e3a47c0d1..67d7bcee28be 100644 --- a/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/workflow.py +++ b/sdks/python/apache_beam/testing/benchmarks/cloudml/pipelines/workflow.py @@ -42,8 +42,7 @@ def format_values(instance): return { k: v.squeeze(0).tolist() if v is not None else self._input_feature_spec[k].default_value - for k, - v in instance.items() + for k, v in instance.items() } return ( diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py index 2173a93c2abe..5588bc3cb1eb 100644 --- a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py +++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query1.py @@ -38,8 +38,5 @@ def load(events, metadata=None, pipeline_options=None): | nexmark_query_util.JustBids() | 'ConvertToEuro' >> beam.Map( lambda bid: nexmark_model.Bid( - bid.auction, - bid.bidder, - bid.price * USD_TO_EURO, - bid.date_time, + bid.auction, bid.bidder, bid.price * USD_TO_EURO, bid.date_time, bid.extra))) diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py index eb16d2dc36a0..f390c8c37001 100644 --- a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py +++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py @@ -74,10 +74,8 @@ def load(events, metadata=None, pipeline_options=None): JoinFn(metadata.get('max_auction_waiting_time'))) | 'query3_output' >> beam.Map( lambda t: { - ResultNames.NAME: t[1].name, - ResultNames.CITY: t[1].city, - ResultNames.STATE: t[1].state, - ResultNames.AUCTION_ID: t[0].id + ResultNames.NAME: t[1].name, ResultNames.CITY: t[1].city, + ResultNames.STATE: t[1].state, ResultNames.AUCTION_ID: t[0].id })) diff --git a/sdks/python/apache_beam/testing/datatype_inference.py b/sdks/python/apache_beam/testing/datatype_inference.py index b68f5ec4a125..d7ecf77c5d5f 100644 --- a/sdks/python/apache_beam/testing/datatype_inference.py +++ b/sdks/python/apache_beam/testing/datatype_inference.py @@ -64,9 +64,8 @@ def infer_typehints_schema(data): for row in data: for key, value in row.items(): column_data.setdefault(key, []).append(value) - column_types = OrderedDict([ - (key, infer_element_type(values)) for key, values in column_data.items() - ]) + column_types = OrderedDict([(key, infer_element_type(values)) + for key, values in column_data.items()]) return column_types @@ -101,8 +100,7 @@ def typehint_to_avro_type(value): column_types = infer_typehints_schema(data) avro_fields = [{ "name": str(key), "type": typehint_to_avro_type(value) - } for key, - value in column_types.items()] + } for key, value in column_types.items()] schema_dict = { "namespace": "example.avro", "name": "User", @@ -127,7 +125,6 @@ def infer_pyarrow_schema(data): for row in data: for key, value in row.items(): column_data.setdefault(key, []).append(value) - column_types = OrderedDict([ - (key, pa.array(value).type) for key, value in column_data.items() - ]) + column_types = OrderedDict([(key, pa.array(value).type) + for key, value in column_data.items()]) return pa.schema(list(column_types.items())) diff --git a/sdks/python/apache_beam/testing/datatype_inference_test.py b/sdks/python/apache_beam/testing/datatype_inference_test.py index 001752f8ab27..0ec7c786e047 100644 --- a/sdks/python/apache_beam/testing/datatype_inference_test.py +++ b/sdks/python/apache_beam/testing/datatype_inference_test.py @@ -149,8 +149,8 @@ def get_collumns_in_order(test_data): OrderedDict([(c, None) for c in columns]) ] test_case["type_schema"] = OrderedDict([ - (k, typehints.Union[v, type(None)]) for k, - v in test_case["type_schema"].items() + (k, typehints.Union[v, type(None)]) + for k, v in test_case["type_schema"].items() ]) test_case["avro_schema"] = nullify_avro_schema(test_case["avro_schema"]) nullified_test_data.append(test_case) diff --git a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py index caadbaca1e1e..33dfeeddba4f 100644 --- a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py +++ b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py @@ -287,8 +287,8 @@ def _prepare_extra_metrics( if not extra_metrics: extra_metrics = {} return [ - Metric(ts, metric_id, v, label=k).as_dict() for k, - v in extra_metrics.items() + Metric(ts, metric_id, v, label=k).as_dict() + for k, v in extra_metrics.items() ] def publish_values(self, labeled_values): @@ -299,8 +299,7 @@ def publish_values(self, labeled_values): """ metric_dicts = [ Metric(time.time(), uuid.uuid4().hex, value, label=label).as_dict() - for label, - value in labeled_values + for label, value in labeled_values ] for publisher in self.publishers: diff --git a/sdks/python/apache_beam/testing/synthetic_pipeline.py b/sdks/python/apache_beam/testing/synthetic_pipeline.py index b18de244e3f8..0ccbfbfbcc18 100644 --- a/sdks/python/apache_beam/testing/synthetic_pipeline.py +++ b/sdks/python/apache_beam/testing/synthetic_pipeline.py @@ -620,8 +620,7 @@ def expand(self, pc): pc | beam.Map(rotate_key) | beam.Map( - lambda elem, - ignored: elem, + lambda elem, ignored: elem, beam.pvalue.AsIter(pc | beam.FlatMap(lambda elem: None)))) diff --git a/sdks/python/apache_beam/transforms/batch_dofn_test.py b/sdks/python/apache_beam/transforms/batch_dofn_test.py index f84b3689d9be..1c09256c27b7 100644 --- a/sdks/python/apache_beam/transforms/batch_dofn_test.py +++ b/sdks/python/apache_beam/transforms/batch_dofn_test.py @@ -263,8 +263,7 @@ def test_cant_infer_batchconverter_input_raises(self): pc = p | beam.Create(['a', 'b', 'c']) with self.assertRaisesRegex( - TypeError, - # Error should mention "input", and the name of the DoFn + TypeError, # Error should mention "input", and the name of the DoFn r'input.*BatchDoFn.*'): _ = pc | beam.ParDo(BatchDoFn()) diff --git a/sdks/python/apache_beam/transforms/combiners.py b/sdks/python/apache_beam/transforms/combiners.py index bc736b327284..58267ef97ac6 100644 --- a/sdks/python/apache_beam/transforms/combiners.py +++ b/sdks/python/apache_beam/transforms/combiners.py @@ -676,21 +676,21 @@ def merge_accumulators(self, accumulators, *args, **kwargs): if len(accumulators_batch) == 1: break result = [ - c.merge_accumulators(a, *args, **kwargs) for c, - a in zip(self._combiners, zip(*accumulators_batch)) + c.merge_accumulators(a, *args, **kwargs) + for c, a in zip(self._combiners, zip(*accumulators_batch)) ] return result def compact(self, accumulator, *args, **kwargs): return [ - c.compact(a, *args, **kwargs) for c, - a in zip(self._combiners, accumulator) + c.compact(a, *args, **kwargs) + for c, a in zip(self._combiners, accumulator) ] def extract_output(self, accumulator, *args, **kwargs): return tuple( - c.extract_output(a, *args, **kwargs) for c, - a in zip(self._combiners, accumulator)) + c.extract_output(a, *args, **kwargs) + for c, a in zip(self._combiners, accumulator)) def teardown(self, *args, **kwargs): for c in reversed(self._combiners): @@ -706,9 +706,8 @@ class TupleCombineFn(_TupleCombineFnBase): """ def add_input(self, accumulator, element, *args, **kwargs): return [ - c.add_input(a, e, *args, **kwargs) for c, - a, - e in zip(self._combiners, accumulator, element) + c.add_input(a, e, *args, **kwargs) + for c, a, e in zip(self._combiners, accumulator, element) ] def with_common_input(self): @@ -724,8 +723,8 @@ class SingleInputTupleCombineFn(_TupleCombineFnBase): """ def add_input(self, accumulator, element, *args, **kwargs): return [ - c.add_input(a, element, *args, **kwargs) for c, - a in zip(self._combiners, accumulator) + c.add_input(a, element, *args, **kwargs) + for c, a in zip(self._combiners, accumulator) ] diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py index b420d1d66d09..53c36a00738f 100644 --- a/sdks/python/apache_beam/transforms/core.py +++ b/sdks/python/apache_beam/transforms/core.py @@ -393,8 +393,8 @@ def get_function_args_defaults(f): parameter.POSITIONAL_ONLY, parameter.POSITIONAL_OR_KEYWORD ] args = [ - name for name, - p in signature.parameters.items() if p.kind in _SUPPORTED_ARG_TYPES + name for name, p in signature.parameters.items() + if p.kind in _SUPPORTED_ARG_TYPES ] defaults = [ p.default for p in signature.parameters.values() @@ -801,12 +801,12 @@ def default_type_hints(self): self.process_batch) or typehints.decorators.IOTypeHints.empty() # Then we deconflict with the typehint from process, if it exists - if (process_batch_type_hints.output_types != - typehints.decorators.IOTypeHints.empty().output_types): - if (process_type_hints.output_types != - typehints.decorators.IOTypeHints.empty().output_types and - process_batch_type_hints.output_types != - process_type_hints.output_types): + if (process_batch_type_hints.output_types + != typehints.decorators.IOTypeHints.empty().output_types): + if (process_type_hints.output_types + != typehints.decorators.IOTypeHints.empty().output_types and + process_batch_type_hints.output_types + != process_type_hints.output_types): raise TypeError( f"DoFn {self!r} yields element from both process and " "process_batch, but they have mismatched output typehints:\n" @@ -1888,10 +1888,10 @@ def to_runner_api_parameter(self, context, **extra_kwargs): # type: ignore[over # transformation is currently irreversible given how # remove_objects_from_args and insert_values_in_args # are currently implemented. - side_inputs={(SIDE_INPUT_PREFIX + '%s') % ix: - si.to_runner_api(context) - for ix, - si in enumerate(self.side_inputs)})) + side_inputs={ + (SIDE_INPUT_PREFIX + '%s') % ix: si.to_runner_api(context) + for ix, si in enumerate(self.side_inputs) + })) @staticmethod @PTransform.register_urn( @@ -1909,8 +1909,8 @@ def from_runner_api_parameter(unused_ptransform, pardo_payload, context): # to_runner_api_parameter above). indexed_side_inputs = [( get_sideinput_index(tag), - pvalue.AsSideInput.from_runner_api(si, context)) for tag, - si in pardo_payload.side_inputs.items()] + pvalue.AsSideInput.from_runner_api(si, context)) + for tag, si in pardo_payload.side_inputs.items()] result.side_inputs = [si for _, si in sorted(indexed_side_inputs)] return result @@ -3054,7 +3054,6 @@ def from_runner_api_parameter(unused_ptransform, combine_payload, context): class CombineValuesDoFn(DoFn): """DoFn for performing per-key Combine transforms.""" - def __init__( self, input_pcoll_type, @@ -3117,7 +3116,6 @@ def default_type_hints(self): class _CombinePerKeyWithHotKeyFanout(PTransform): - def __init__( self, combine_fn, # type: CombineFn @@ -3347,7 +3345,6 @@ class GroupBy(PTransform): The GroupBy operation can be made into an aggregating operation by invoking its `aggregate_field` method. """ - def __init__( self, *fields, # type: typing.Union[str, typing.Callable] @@ -3487,8 +3484,7 @@ def expand(self, pcoll): TupleCombineFn( *[combine_fn for _, combine_fn, __ in self._aggregations])) | MapTuple( - lambda key, - value: _dynamic_named_tuple('Result', result_fields) + lambda key, value: _dynamic_named_tuple('Result', result_fields) (*(key + value)))) @@ -3505,7 +3501,6 @@ class Select(PTransform): pcoll | beam.Map(lambda x: beam.Row(a=x.a, b=foo(x))) """ - def __init__( self, *args, # type: typing.Union[str, typing.Callable] @@ -3529,8 +3524,10 @@ def expand(self, pcoll): return ( _MaybePValueWithErrors(pcoll, self._exception_handling_args) | Map( lambda x: pvalue.Row( - **{name: expr(x) - for name, expr in self._fields}))).as_result() + **{ + name: expr(x) + for name, expr in self._fields + }))).as_result() def infer_output_type(self, input_type): def extract_return_type(expr): @@ -3586,14 +3583,15 @@ def expand(self, pcoll): class Windowing(object): - def __init__(self, - windowfn, # type: WindowFn - triggerfn=None, # type: typing.Optional[TriggerFn] - accumulation_mode=None, # type: typing.Optional[beam_runner_api_pb2.AccumulationMode.Enum.ValueType] - timestamp_combiner=None, # type: typing.Optional[beam_runner_api_pb2.OutputTime.Enum.ValueType] - allowed_lateness=0, # type: typing.Union[int, float] - environment_id=None, # type: typing.Optional[str] - ): + def __init__( + self, + windowfn, # type: WindowFn + triggerfn=None, # type: typing.Optional[TriggerFn] + accumulation_mode=None, # type: typing.Optional[beam_runner_api_pb2.AccumulationMode.Enum.ValueType] + timestamp_combiner=None, # type: typing.Optional[beam_runner_api_pb2.OutputTime.Enum.ValueType] + allowed_lateness=0, # type: typing.Union[int, float] + environment_id=None, # type: typing.Optional[str] + ): """Class representing the window strategy. Args: diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py index 14cd485d1f8e..451a504342e1 100644 --- a/sdks/python/apache_beam/transforms/display.py +++ b/sdks/python/apache_beam/transforms/display.py @@ -221,8 +221,7 @@ def create_from_options(cls, pipeline_options): items = { k: (v if DisplayDataItem._get_value_type(v) is not None else str(v)) - for k, - v in pipeline_options.display_data().items() + for k, v in pipeline_options.display_data().items() } return cls(pipeline_options._get_display_data_namespace(), items) diff --git a/sdks/python/apache_beam/transforms/environments.py b/sdks/python/apache_beam/transforms/environments.py index b8d3d3dcfb22..ee9c32302922 100644 --- a/sdks/python/apache_beam/transforms/environments.py +++ b/sdks/python/apache_beam/transforms/environments.py @@ -106,11 +106,12 @@ class Environment(object): _known_urns = {} # type: dict[str, tuple[Optional[type], ConstructorFn]] _urn_to_env_cls = {} # type: dict[str, type] - def __init__(self, + def __init__( + self, capabilities=(), # type: Iterable[str] artifacts=(), # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints=None, # type: Optional[Mapping[str, bytes]] - ): + ): # type: (...) -> None self._capabilities = capabilities self._artifacts = sorted(artifacts, key=lambda x: x.SerializeToString()) @@ -169,21 +170,23 @@ def register_urn( @classmethod @overload - def register_urn(cls, - urn, # type: str - parameter_type, # type: type[T] - constructor # type: Callable[[T, Iterable[str], Iterable[beam_runner_api_pb2.ArtifactInformation], PipelineContext], Any] - ): + def register_urn( + cls, + urn, # type: str + parameter_type, # type: type[T] + constructor # type: Callable[[T, Iterable[str], Iterable[beam_runner_api_pb2.ArtifactInformation], PipelineContext], Any] + ): # type: (...) -> None pass @classmethod @overload - def register_urn(cls, - urn, # type: str - parameter_type, # type: None - constructor # type: Callable[[bytes, Iterable[str], Iterable[beam_runner_api_pb2.ArtifactInformation], PipelineContext], Any] - ): + def register_urn( + cls, + urn, # type: str + parameter_type, # type: None + constructor # type: Callable[[bytes, Iterable[str], Iterable[beam_runner_api_pb2.ArtifactInformation], PipelineContext], Any] + ): # type: (...) -> None pass @@ -227,10 +230,11 @@ def to_runner_api(self, context): resource_hints=self.resource_hints()) @classmethod - def from_runner_api(cls, - proto, # type: Optional[beam_runner_api_pb2.Environment] - context # type: PipelineContext - ): + def from_runner_api( + cls, + proto, # type: Optional[beam_runner_api_pb2.Environment] + context # type: PipelineContext + ): # type: (...) -> Optional[Environment] if proto is None or not proto.urn: return None @@ -281,12 +285,13 @@ def to_runner_api_parameter(self, context): return common_urns.environments.DEFAULT.urn, None @staticmethod - def from_runner_api_parameter(payload, # type: beam_runner_api_pb2.DockerPayload + def from_runner_api_parameter( + payload, # type: beam_runner_api_pb2.DockerPayload capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> DefaultEnvironment return DefaultEnvironment( capabilities=capabilities, @@ -334,12 +339,13 @@ def to_runner_api_parameter(self, context): beam_runner_api_pb2.DockerPayload(container_image=self.container_image)) @staticmethod - def from_runner_api_parameter(payload, # type: beam_runner_api_pb2.DockerPayload + def from_runner_api_parameter( + payload, # type: beam_runner_api_pb2.DockerPayload capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> DockerEnvironment return DockerEnvironment( container_image=payload.container_image, @@ -446,12 +452,13 @@ def to_runner_api_parameter(self, context): os=self.os, arch=self.arch, command=self.command, env=self.env)) @staticmethod - def from_runner_api_parameter(payload, + def from_runner_api_parameter( + payload, capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> ProcessEnvironment return ProcessEnvironment( command=payload.command, @@ -542,12 +549,13 @@ def to_runner_api_parameter(self, context): params=self.params)) @staticmethod - def from_runner_api_parameter(payload, # type: beam_runner_api_pb2.ExternalPayload + def from_runner_api_parameter( + payload, # type: beam_runner_api_pb2.ExternalPayload capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> ExternalEnvironment return ExternalEnvironment( payload.endpoint.url, @@ -605,12 +613,13 @@ def to_runner_api_parameter(self, context): return python_urns.EMBEDDED_PYTHON, None @staticmethod - def from_runner_api_parameter(unused_payload, # type: None + def from_runner_api_parameter( + unused_payload, # type: None capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> EmbeddedPythonEnvironment return EmbeddedPythonEnvironment(capabilities, artifacts, resource_hints) @@ -677,12 +686,13 @@ def to_runner_api_parameter(self, context): return python_urns.EMBEDDED_PYTHON_GRPC, payload @staticmethod - def from_runner_api_parameter(payload, # type: bytes + def from_runner_api_parameter( + payload, # type: bytes capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> EmbeddedPythonGrpcEnvironment if payload: config = EmbeddedPythonGrpcEnvironment.parse_config( @@ -742,12 +752,13 @@ def to_runner_api_parameter(self, context): return python_urns.EMBEDDED_PYTHON_LOOPBACK, None @staticmethod - def from_runner_api_parameter(unused_payload, # type: None + def from_runner_api_parameter( + unused_payload, # type: None capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> PythonLoopbackEnvironment return PythonLoopbackEnvironment( capabilities=capabilities, @@ -784,12 +795,13 @@ def to_runner_api_parameter(self, context): return python_urns.SUBPROCESS_SDK, self.command_string.encode('utf-8') @staticmethod - def from_runner_api_parameter(payload, # type: bytes + def from_runner_api_parameter( + payload, # type: bytes capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> SubprocessSDKEnvironment return SubprocessSDKEnvironment( payload.decode('utf-8'), capabilities, artifacts, resource_hints) @@ -827,12 +839,13 @@ def to_runner_api_parameter(self, context): ])) @staticmethod - def from_runner_api_parameter(payload, # type: beam_runner_api_pb2.AnyOfEnvironmentPayload + def from_runner_api_parameter( + payload, # type: beam_runner_api_pb2.AnyOfEnvironmentPayload capabilities, # type: Iterable[str] artifacts, # type: Iterable[beam_runner_api_pb2.ArtifactInformation] resource_hints, # type: Mapping[str, bytes] context # type: PipelineContext - ): + ): # type: (...) -> AnyOfEnvironment return AnyOfEnvironment([ Environment.from_runner_api(env, context) diff --git a/sdks/python/apache_beam/transforms/external.py b/sdks/python/apache_beam/transforms/external.py index 3fc58f04a78f..96a3fdd86c7d 100644 --- a/sdks/python/apache_beam/transforms/external.py +++ b/sdks/python/apache_beam/transforms/external.py @@ -162,8 +162,8 @@ def _get_named_tuple_instance(self): } schema = named_fields_to_schema([ - (key, convert_to_typing_type(instance_to_type(value))) for key, - value in values.items() + (key, convert_to_typing_type(instance_to_type(value))) + for key, value in values.items() ]) return named_tuple_from_schema(schema)(**values) @@ -226,8 +226,7 @@ def dict_to_row_recursive(field_type, py_value): elif type_info == 'map_type': return { key: dict_to_row_recursive(field_type.map_type.value_type, value) - for key, - value in py_value.items() + for key, value in py_value.items() } else: return py_value @@ -595,8 +594,8 @@ def __init__(self, transform, **values): def _get_named_tuple_instance(self): schema = named_fields_to_schema([ - (k, convert_to_typing_type(v)) for k, - v in self._transform.__init__.__annotations__.items() + (k, convert_to_typing_type(v)) + for k, v in self._transform.__init__.__annotations__.items() if k in self._values ]) return named_tuple_from_schema(schema)(**self._values) @@ -770,8 +769,7 @@ def fix_output(pcoll, tag): self._outputs = { tag: fix_output(result_context.pcollections.get_by_id(pcoll_id), tag) - for tag, - pcoll_id in self._expanded_transform.outputs.items() + for tag, pcoll_id in self._expanded_transform.outputs.items() } return self._output_to_pvalueish(self._outputs) @@ -892,13 +890,11 @@ def _normalize(coder_proto): subtransforms=proto.subtransforms, inputs={ tag: pcoll_renames.get(pcoll, pcoll) - for tag, - pcoll in proto.inputs.items() + for tag, pcoll in proto.inputs.items() }, outputs={ tag: pcoll_renames.get(pcoll, pcoll) - for tag, - pcoll in proto.outputs.items() + for tag, pcoll in proto.outputs.items() }, display_data=proto.display_data, environment_id=proto.environment_id) @@ -913,13 +909,11 @@ def _normalize(coder_proto): subtransforms=self._expanded_transform.subtransforms, inputs={ tag: pcoll_renames.get(pcoll, pcoll) - for tag, - pcoll in self._expanded_transform.inputs.items() + for tag, pcoll in self._expanded_transform.inputs.items() }, outputs={ tag: pcoll_renames.get(pcoll, pcoll) - for tag, - pcoll in self._expanded_transform.outputs.items() + for tag, pcoll in self._expanded_transform.outputs.items() }, annotations=self._expanded_transform.annotations, environment_id=self._expanded_transform.environment_id) diff --git a/sdks/python/apache_beam/transforms/fully_qualified_named_transform_test.py b/sdks/python/apache_beam/transforms/fully_qualified_named_transform_test.py index f4a4f75126a1..6b3050671da7 100644 --- a/sdks/python/apache_beam/transforms/fully_qualified_named_transform_test.py +++ b/sdks/python/apache_beam/transforms/fully_qualified_named_transform_test.py @@ -115,11 +115,12 @@ def test_callable_transform(self): | FullyQualifiedNamedTransform( '__callable__', # the next argument is a callable to be applied ( - python_callable.PythonCallableWithSource(""" + python_callable.PythonCallableWithSource( + """ def func(pcoll, x): return pcoll | beam.Map(lambda e: e + x) """), - 'x' # arguments passed to the callable + 'x' # arguments passed to the callable ), {}), equal_to(['ax', 'bx', 'cx'])) @@ -133,16 +134,16 @@ def test_constructor_transform(self): '__constructor__', # the next argument constructs a PTransform (), { - 'source': python_callable.PythonCallableWithSource(""" + 'source': python_callable.PythonCallableWithSource( + """ class MyTransform(beam.PTransform): def __init__(self, x): self._x = x def expand(self, pcoll): return pcoll | beam.Map(lambda e: e + self._x) """), - 'x': 'x' # arguments passed to the above constructor - } - ), + 'x': 'x' # arguments passed to the above constructor + }), equal_to(['ax', 'bx', 'cx'])) def test_glob_filter(self): diff --git a/sdks/python/apache_beam/transforms/ptransform.py b/sdks/python/apache_beam/transforms/ptransform.py index aeec91cdfc97..7f48c24a2dbd 100644 --- a/sdks/python/apache_beam/transforms/ptransform.py +++ b/sdks/python/apache_beam/transforms/ptransform.py @@ -704,21 +704,23 @@ def register_urn( @classmethod @overload - def register_urn(cls, - urn, # type: str - parameter_type, # type: type[T] - constructor # type: Callable[[beam_runner_api_pb2.PTransform, T, PipelineContext], Any] - ): + def register_urn( + cls, + urn, # type: str + parameter_type, # type: type[T] + constructor # type: Callable[[beam_runner_api_pb2.PTransform, T, PipelineContext], Any] + ): # type: (...) -> None pass @classmethod @overload - def register_urn(cls, - urn, # type: str - parameter_type, # type: None - constructor # type: Callable[[beam_runner_api_pb2.PTransform, bytes, PipelineContext], Any] - ): + def register_urn( + cls, + urn, # type: str + parameter_type, # type: None + constructor # type: Callable[[beam_runner_api_pb2.PTransform, bytes, PipelineContext], Any] + ): # type: (...) -> None pass @@ -754,10 +756,11 @@ def to_runner_api(self, context, has_parts=False, **extra_kwargs): if isinstance(typed_param, str) else typed_param) @classmethod - def from_runner_api(cls, - proto, # type: Optional[beam_runner_api_pb2.PTransform] - context # type: PipelineContext - ): + def from_runner_api( + cls, + proto, # type: Optional[beam_runner_api_pb2.PTransform] + context # type: PipelineContext + ): # type: (...) -> Optional[PTransform] if proto is None or proto.spec is None or not proto.spec.urn: return None @@ -1202,8 +1205,7 @@ def wrapper(*args, **kwargs): # The outermost call is expected to be the most specific. 'yaml_provider': 'python', 'yaml_type': 'PyTransform', - 'yaml_args': config, - } + 'yaml_args': config, } return transform return wrapper diff --git a/sdks/python/apache_beam/transforms/ptransform_test.py b/sdks/python/apache_beam/transforms/ptransform_test.py index f9f6b230866e..de9838beb4d9 100644 --- a/sdks/python/apache_beam/transforms/ptransform_test.py +++ b/sdks/python/apache_beam/transforms/ptransform_test.py @@ -267,9 +267,8 @@ def test_undeclared_outputs(self): nums = pipeline | 'Some Numbers' >> beam.Create([1, 2, 3, 4]) results = nums | 'ClassifyNumbers' >> beam.FlatMap( lambda x: [ - x, - pvalue.TaggedOutput('even' if x % 2 == 0 else 'odd', x), - pvalue.TaggedOutput('extra', x) + x, pvalue.TaggedOutput('even' if x % 2 == 0 else 'odd', x), pvalue + .TaggedOutput('extra', x) ]).with_outputs() assert_that(results[None], equal_to([1, 2, 3, 4])) assert_that(results.odd, equal_to([1, 3]), label='assert:odd') @@ -343,9 +342,8 @@ def finish_bundle(self): | beam.ParDo(MyDoFn()) | WindowInto(windowfn) | 'create tuple' >> beam.Map( - lambda v, - t=beam.DoFn.TimestampParam, - w=beam.DoFn.WindowParam: (v, t, w.start, w.end))) + lambda v, t=beam.DoFn.TimestampParam, w=beam.DoFn.WindowParam: + (v, t, w.start, w.end))) expected_process = [ ('process1', Timestamp(5), Timestamp(4), Timestamp(6)) ] @@ -457,8 +455,7 @@ def test_combine_with_side_input_as_arg(self): divisor = pipeline | 'Divisor' >> beam.Create([2]) result = pcoll | 'Max' >> beam.CombineGlobally( # Multiples of divisor only. - lambda vals, - d: max(v for v in vals if v % d == 0), + lambda vals, d: max(v for v in vals if v % d == 0), pvalue.AsSingleton(divisor)).without_defaults() filt_vals = [v for v in values if v % 2 == 0] assert_that(result, equal_to([max(filt_vals)])) @@ -492,8 +489,7 @@ def test_combine_per_key_with_side_input_as_arg(self): ([('a', x) for x in vals_1] + [('b', x) for x in vals_2])) divisor = pipeline | 'Divisor' >> beam.Create([2]) result = pcoll | beam.CombinePerKey( - lambda vals, - d: max(v for v in vals if v % d == 0), + lambda vals, d: max(v for v in vals if v % d == 0), pvalue.AsSingleton(divisor)) # Multiples of divisor only. m_1 = max(v for v in vals_1 if v % 2 == 0) m_2 = max(v for v in vals_2 if v % 2 == 0) @@ -687,10 +683,7 @@ def test_partition_with_callable_and_side_input(self): side_input = pipeline | 'Side Input' >> beam.Create([100, 1000]) partitions = ( pcoll | 'part' >> beam.Partition( - lambda e, - n, - offset, - si_list: ((e + len(si_list)) % 3) + offset, + lambda e, n, offset, si_list: ((e + len(si_list)) % 3) + offset, 4, 1, pvalue.AsList(side_input))) @@ -974,8 +967,10 @@ def test_fields(self): def normalize(key, values): if isinstance(key, tuple): key = beam.Row( - **{name: value - for name, value in zip(type(key)._fields, key)}) + **{ + name: value + for name, value in zip(type(key)._fields, key) + }) return key, sorted(v.value for v in values) with TestPipeline() as p: @@ -1024,8 +1019,10 @@ def normalize(key, values): def test_aggregate(self): def named_tuple_to_row(t): return beam.Row( - **{name: value - for name, value in zip(type(t)._fields, t)}) + **{ + name: value + for name, value in zip(type(t)._fields, t) + }) with TestPipeline() as p: pcoll = p | beam.Create(range(-2, 3)) | beam.Map( @@ -1034,15 +1031,15 @@ def named_tuple_to_row(t): assert_that( pcoll - | beam.GroupBy('square', big=lambda x: x.value > 1) - .aggregate_field('value', sum, 'sum') - .aggregate_field(lambda x: x.sign == 1, all, 'positive') + | beam.GroupBy('square', big=lambda x: x.value > 1).aggregate_field( + 'value', sum, 'sum').aggregate_field( + lambda x: x.sign == 1, all, 'positive') | beam.Map(named_tuple_to_row), equal_to([ - beam.Row(square=0, big=False, sum=0, positive=False), # [0], - beam.Row(square=1, big=False, sum=0, positive=False), # [-1, 1] + beam.Row(square=0, big=False, sum=0, positive=False), # [0], + beam.Row(square=1, big=False, sum=0, positive=False), # [-1, 1] beam.Row(square=4, big=False, sum=-2, positive=False), # [-2] - beam.Row(square=4, big=True, sum=2, positive=True), # [2] + beam.Row(square=4, big=True, sum=2, positive=True), # [2] ])) def test_pickled_field(self): diff --git a/sdks/python/apache_beam/transforms/sideinputs_test.py b/sdks/python/apache_beam/transforms/sideinputs_test.py index 4c6df9f9d8ec..b16a72bf59cc 100644 --- a/sdks/python/apache_beam/transforms/sideinputs_test.py +++ b/sdks/python/apache_beam/transforms/sideinputs_test.py @@ -100,17 +100,17 @@ def test_sliding_windows(self): window.SlidingWindows(size=6, period=2), expected=[ # Element 1 falls in three windows - (1, [1]), # [-4, 2) - (1, [1, 2]), # [-2, 4) + (1, [1]), # [-4, 2) + (1, [1, 2]), # [-2, 4) (1, [1, 2, 4]), # [0, 6) # as does 2, - (2, [1, 2]), # [-2, 4) + (2, [1, 2]), # [-2, 4) (2, [1, 2, 4]), # [0, 6) - (2, [2, 4]), # [2, 8) + (2, [2, 4]), # [2, 8) # and 4. (4, [1, 2, 4]), # [0, 6) - (4, [2, 4]), # [2, 8) - (4, [4]), # [4, 10) + (4, [2, 4]), # [2, 8) + (4, [4]), # [4, 10) ]) def test_windowed_iter(self): @@ -228,9 +228,7 @@ def test_as_list_and_as_dict_side_inputs(self): side_list = pipeline | 'side list' >> beam.Create(a_list) side_pairs = pipeline | 'side pairs' >> beam.Create(some_pairs) results = main_input | 'concatenate' >> beam.Map( - lambda x, - the_list, - the_dict: [x, the_list, the_dict], + lambda x, the_list, the_dict: [x, the_list, the_dict], beam.pvalue.AsList(side_list), beam.pvalue.AsDict(side_pairs)) @@ -256,9 +254,7 @@ def test_as_singleton_without_unique_labels(self): main_input = pipeline | 'main input' >> beam.Create([1]) side_list = pipeline | 'side list' >> beam.Create(a_list) results = main_input | beam.Map( - lambda x, - s1, - s2: [x, s1, s2], + lambda x, s1, s2: [x, s1, s2], beam.pvalue.AsSingleton(side_list), beam.pvalue.AsSingleton(side_list)) @@ -281,9 +277,7 @@ def test_as_singleton_with_different_defaults(self): main_input = pipeline | 'main input' >> beam.Create([1]) side_list = pipeline | 'side list' >> beam.Create(a_list) results = main_input | beam.Map( - lambda x, - s1, - s2: [x, s1, s2], + lambda x, s1, s2: [x, s1, s2], beam.pvalue.AsSingleton(side_list, default_value=2), beam.pvalue.AsSingleton(side_list, default_value=3)) @@ -308,9 +302,7 @@ def test_as_list_twice(self): main_input = pipeline | 'main input' >> beam.Create([1]) side_list = pipeline | 'side list' >> beam.Create(a_list) results = main_input | beam.Map( - lambda x, - ls1, - ls2: [x, ls1, ls2], + lambda x, ls1, ls2: [x, ls1, ls2], beam.pvalue.AsList(side_list), beam.pvalue.AsList(side_list)) @@ -333,9 +325,7 @@ def test_as_dict_twice(self): main_input = pipeline | 'main input' >> beam.Create([1]) side_kvs = pipeline | 'side kvs' >> beam.Create(some_kvs) results = main_input | beam.Map( - lambda x, - dct1, - dct2: [x, dct1, dct2], + lambda x, dct1, dct2: [x, dct1, dct2], beam.pvalue.AsDict(side_kvs), beam.pvalue.AsDict(side_kvs)) diff --git a/sdks/python/apache_beam/transforms/sql_test.py b/sdks/python/apache_beam/transforms/sql_test.py index a87cf266d4b6..e020d7ec5998 100644 --- a/sdks/python/apache_beam/transforms/sql_test.py +++ b/sdks/python/apache_beam/transforms/sql_test.py @@ -48,8 +48,8 @@ @pytest.mark.xlang_sql_expansion_service @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - None, + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is None, "Must be run with a runner that supports staging java artifacts.") class SqlTransformTest(unittest.TestCase): """Tests that exercise the cross-language SqlTransform (implemented in java). diff --git a/sdks/python/apache_beam/transforms/trigger.py b/sdks/python/apache_beam/transforms/trigger.py index 63895704727f..7d573a58e3f1 100644 --- a/sdks/python/apache_beam/transforms/trigger.py +++ b/sdks/python/apache_beam/transforms/trigger.py @@ -774,8 +774,7 @@ def should_fire(self, time_domain, watermark, window, context): return self.combine_op( trigger.should_fire( time_domain, watermark, window, self._sub_context(context, ix)) - for ix, - trigger in enumerate(self.triggers)) + for ix, trigger in enumerate(self.triggers)) def on_fire(self, watermark, window, context): finished = [] @@ -1419,8 +1418,8 @@ def merge(_, to_be_merged, merge_result): # pylint: disable=no-self-argument ( element_output_time for element_output_time in ( self.timestamp_combiner_impl.assign_output_time( - window, timestamp) for unused_value, - timestamp in elements) + window, timestamp) + for unused_value, timestamp in elements) if element_output_time >= output_watermark)) if output_time is not None: state.add_state(window, self.WATERMARK_HOLD, output_time) diff --git a/sdks/python/apache_beam/transforms/trigger_test.py b/sdks/python/apache_beam/transforms/trigger_test.py index 962a06e485df..2cad624272ba 100644 --- a/sdks/python/apache_beam/transforms/trigger_test.py +++ b/sdks/python/apache_beam/transforms/trigger_test.py @@ -197,8 +197,10 @@ def test_fixed_watermark(self): AfterWatermark(), AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (13, 'c')], - {IntervalWindow(0, 10): [set('ab')], - IntervalWindow(10, 20): [set('c')]}, + { + IntervalWindow(0, 10): [set('ab')], + IntervalWindow(10, 20): [set('c')] + }, 1, 2, 3, @@ -225,36 +227,38 @@ def test_fixed_watermark_with_early(self): def test_fixed_watermark_with_early_late(self): self.run_trigger_simple( FixedWindows(100), # pyformat break - AfterWatermark(early=AfterCount(3), - late=AfterCount(2)), + AfterWatermark(early=AfterCount(3), late=AfterCount(2)), AccumulationMode.DISCARDING, zip(range(9), 'abcdefghi'), - {IntervalWindow(0, 100): [ - set('abcd'), set('efgh'), # early - set('i'), # on time - set('vw'), set('xy') # late - ]}, + { + IntervalWindow(0, 100): [ + set('abcd'), + set('efgh'), # early + set('i'), # on time + set('vw'), + set('xy') # late + ] + }, 2, late_data=zip(range(5), 'vwxyz')) def test_sessions_watermark_with_early_late(self): self.run_trigger_simple( Sessions(10), # pyformat break - AfterWatermark(early=AfterCount(2), - late=AfterCount(1)), + AfterWatermark(early=AfterCount(2), late=AfterCount(1)), AccumulationMode.ACCUMULATING, [(1, 'a'), (15, 'b'), (7, 'c'), (30, 'd')], { IntervalWindow(1, 25): [ - set('abc'), # early - set('abc'), # on time - set('abcxy') # late + set('abc'), # early + set('abc'), # on time + set('abcxy') # late ], IntervalWindow(30, 40): [ - set('d'), # on time + set('d'), # on time ], IntervalWindow(1, 40): [ - set('abcdxyz') # late + set('abcdxyz') # late ], }, 2, @@ -303,13 +307,16 @@ def test_repeatedly_after_first(self): Repeatedly(AfterAny(AfterCount(3), AfterWatermark())), AccumulationMode.ACCUMULATING, zip(range(7), 'abcdefg'), - {IntervalWindow(0, 100): [ - set('abc'), - set('abcdef'), - set('abcdefg'), - set('abcdefgx'), - set('abcdefgxy'), - set('abcdefgxyz')]}, + { + IntervalWindow(0, 100): [ + set('abc'), + set('abcdef'), + set('abcdefg'), + set('abcdefgx'), + set('abcdefgxy'), + set('abcdefgxyz') + ] + }, 1, late_data=zip(range(3), 'xyz')) @@ -350,8 +357,10 @@ def test_sessions_default(self): AccumulationMode.ACCUMULATING, [(1, 'a'), (2, 'b'), (15, 'c'), (16, 'd'), (30, 'z'), (9, 'e'), (10, 'f'), (30, 'y')], - {IntervalWindow(1, 26): [set('abcdef')], - IntervalWindow(30, 40): [set('yz')]}, + { + IntervalWindow(1, 26): [set('abcdef')], + IntervalWindow(30, 40): [set('yz')] + }, 1, 2, 3, @@ -381,9 +390,11 @@ def test_sessions_after_count(self): AccumulationMode.ACCUMULATING, [(1, 'a'), (15, 'b'), (6, 'c'), (30, 's'), (31, 't'), (50, 'z'), (50, 'y')], - {IntervalWindow(1, 25): [set('abc')], - IntervalWindow(30, 41): [set('st')], - IntervalWindow(50, 60): [set('yz')]}, + { + IntervalWindow(1, 25): [set('abc')], + IntervalWindow(30, 41): [set('st')], + IntervalWindow(50, 60): [set('yz')] + }, 1, 2, 3) @@ -412,8 +423,10 @@ def test_sessions_after_each(self): AfterEach(AfterCount(2), AfterCount(3)), AccumulationMode.ACCUMULATING, zip(range(10), 'abcdefghij'), - {IntervalWindow(0, 11): [set('ab')], - IntervalWindow(0, 15): [set('abcdef')]}, + { + IntervalWindow(0, 11): [set('ab')], + IntervalWindow(0, 15): [set('abcdef')] + }, 2) self.run_trigger_simple( @@ -421,9 +434,11 @@ def test_sessions_after_each(self): Repeatedly(AfterEach(AfterCount(2), AfterCount(3))), AccumulationMode.ACCUMULATING, zip(range(10), 'abcdefghij'), - {IntervalWindow(0, 11): [set('ab')], - IntervalWindow(0, 15): [set('abcdef')], - IntervalWindow(0, 17): [set('abcdefgh')]}, + { + IntervalWindow(0, 11): [set('ab')], + IntervalWindow(0, 15): [set('abcdef')], + IntervalWindow(0, 17): [set('abcdefgh')] + }, 2) def test_picklable_output(self): @@ -678,9 +693,11 @@ def test_after_count_streaming(self): assert_that( results, - equal_to(list({ - 'A': [1, 2, 3], # 4 - 6 discarded because trigger finished - 'B': [1, 2, 3]}.items()))) + equal_to( + list({ + 'A': [1, 2, 3], # 4 - 6 discarded because trigger finished + 'B': [1, 2, 3] + }.items()))) def test_always(self): with TestPipeline() as p: @@ -706,12 +723,9 @@ def format_result(k, vs): result, equal_to( list({ - 'A-2': {10, 11}, - # Elements out of windows are also emitted. - 'A-6': {1, 2, 3, 4, 5}, - # A,1 is emitted twice. - 'B-5': {6, 7, 8, 9}, - # B,6 is emitted twice. + 'A-2': {10, 11}, # Elements out of windows are also emitted. + 'A-6': {1, 2, 3, 4, 5}, # A,1 is emitted twice. + 'B-5': {6, 7, 8, 9}, # B,6 is emitted twice. 'B-3': {10, 15, 16}, }.items()))) diff --git a/sdks/python/apache_beam/transforms/util.py b/sdks/python/apache_beam/transforms/util.py index 7c3a1929ba9d..08e111992ef6 100644 --- a/sdks/python/apache_beam/transforms/util.py +++ b/sdks/python/apache_beam/transforms/util.py @@ -1180,9 +1180,7 @@ def WithKeys(pcoll, k, *args, **kwargs): for arg in args) and all(isinstance(kwarg, AsSideInput) for kwarg in kwargs.values()): return pcoll | Map( - lambda v, - *args, - **kwargs: (k(v, *args, **kwargs), v), + lambda v, *args, **kwargs: (k(v, *args, **kwargs), v), *args, **kwargs) return pcoll | Map(lambda v: (k(v, *args, **kwargs), v)) @@ -1326,8 +1324,8 @@ def _validate(self): 'batch_size must be a positive value') assert ( self.max_buffering_duration_secs is not None and - self.max_buffering_duration_secs >= 0), ( - 'max_buffering_duration must be a non-negative value') + self.max_buffering_duration_secs + >= 0), ('max_buffering_duration must be a non-negative value') def get_payload(self): return beam_runner_api_pb2.GroupIntoBatchesPayload( diff --git a/sdks/python/apache_beam/transforms/util_test.py b/sdks/python/apache_beam/transforms/util_test.py index c8304255238c..ac703dd53e54 100644 --- a/sdks/python/apache_beam/transforms/util_test.py +++ b/sdks/python/apache_beam/transforms/util_test.py @@ -294,8 +294,8 @@ def test_global_batch_timestamps(self): | beam.Create(range(3), reshuffle=False) | util.BatchElements(min_batch_size=2, max_batch_size=2) | beam.Map( - lambda batch, - timestamp=beam.DoFn.TimestampParam: (len(batch), timestamp))) + lambda batch, timestamp=beam.DoFn.TimestampParam: + (len(batch), timestamp))) assert_that( res, equal_to([ @@ -307,12 +307,19 @@ def test_sized_batches(self): with TestPipeline() as p: res = ( p - | beam.Create([ - 'a', 'a', # First batch. - 'aaaaaaaaaa', # Second batch. - 'aaaaa', 'aaaaa', # Third batch. - 'a', 'aaaaaaa', 'a', 'a' # Fourth batch. - ], reshuffle=False) + | beam.Create( + [ + 'a', + 'a', # First batch. + 'aaaaaaaaaa', # Second batch. + 'aaaaa', + 'aaaaa', # Third batch. + 'a', + 'aaaaaaa', + 'a', + 'a' # Fourth batch. + ], + reshuffle=False) | util.BatchElements( min_batch_size=10, max_batch_size=10, element_size_fn=len) | beam.Map(lambda batch: ''.join(batch)) @@ -336,10 +343,10 @@ def test_sized_windowed_batches(self): assert_that( res, equal_to([ - 'a' * (1+2), # Elements in [1, 3) - 'a' * (3+4), # Elements in [3, 6) + 'a' * (1 + 2), # Elements in [1, 3) + 'a' * (3 + 4), # Elements in [3, 6) 'a' * 5, - 'a' * 6, # Elements in [6, 9) + 'a' * 6, # Elements in [6, 9) 'a' * 7, ])) @@ -558,8 +565,8 @@ def test_stateful_buffering_timer_in_fixed_window_streaming(self): start_time = timestamp.Timestamp(0) test_stream = ( TestStream().add_elements([ - TimestampedValue(value, start_time + i) for i, - value in enumerate(BatchElementsTest._create_test_data()) + TimestampedValue(value, start_time + i) + for i, value in enumerate(BatchElementsTest._create_test_data()) ]).advance_processing_time(150).advance_watermark_to( start_time + window_duration).advance_watermark_to( start_time + window_duration + @@ -803,14 +810,13 @@ def test_reshuffle_windows_unchanged(self): v, t - .001, [w], pane_info=PaneInfo(True, False, PaneInfoTiming.ON_TIME, 0, 0)) - for (v, t, w) in [((1, contains_in_any_order([2, 1])), - 4.0, - IntervalWindow(1.0, 4.0)), - ((2, contains_in_any_order([2, 1])), - 4.0, + for (v, t, w) in [((1, contains_in_any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ( - (3, [1]), 3.0, IntervalWindow(1.0, 3.0)), ( - (1, [4]), 6.0, IntervalWindow(4.0, 6.0))] + (2, contains_in_any_order([2, 1])), 4.0, + IntervalWindow(1.0, 4.0)), (( + 3, [1]), 3.0, IntervalWindow(1.0, 3.0)), (( + 1, + [4]), 6.0, IntervalWindow(4.0, 6.0))] ] before_reshuffle = ( pipeline @@ -838,13 +844,12 @@ def test_reshuffle_window_fn_preserved(self): expected_windows = [ TestWindowedValue(v, t, [w]) - for (v, t, w) in [((1, 1), 1.0, IntervalWindow(1.0, 3.0)), ( - (2, 1), 1.0, IntervalWindow(1.0, 3.0)), ( - (3, 1), 1.0, IntervalWindow(1.0, 3.0)), ( - (1, 2), 2.0, IntervalWindow(2.0, 4.0)), ( + for (v, t, w) in [((1, 1), 1.0, IntervalWindow(1.0, 3.0)), (( + 2, 1), 1.0, IntervalWindow(1.0, 3.0)), (( + 3, 1), 1.0, IntervalWindow(1.0, 3.0)), (( + 1, 2), 2.0, IntervalWindow(2.0, 4.0)), ( (2, 2), 2.0, - IntervalWindow(2.0, 4.0)), ((1, 4), - 4.0, + IntervalWindow(2.0, 4.0)), ((1, 4), 4.0, IntervalWindow(4.0, 6.0))] ] expected_merged_windows = [ @@ -856,8 +861,7 @@ def test_reshuffle_window_fn_preserved(self): w) in [((1, any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ( (2, any_order([2, 1])), 4.0, IntervalWindow(1.0, 4.0)), ( (3, [1]), 3.0, - IntervalWindow(1.0, 3.0)), ((1, [4]), - 6.0, + IntervalWindow(1.0, 3.0)), ((1, [4]), 6.0, IntervalWindow(4.0, 6.0))] ] before_reshuffle = ( @@ -969,83 +973,61 @@ def test_reshuffle_custom_window_preserves_metadata(self, compat_version): ] expected_timestamp = GlobalWindow().max_timestamp() - expected = [ - TestWindowedValue( - ('key', [0, 1, 2]), - expected_timestamp, - [GlobalWindow()], - pane_info=PaneInfo( - is_first=True, - is_last=False, - timing=PaneInfoTiming.EARLY, # 0 - index=0, - nonspeculative_index=-1 - ) - ), - TestWindowedValue( - ('key', [3, 4, 5]), - expected_timestamp, - [GlobalWindow()], - pane_info=PaneInfo( - is_first=False, - is_last=False, - timing=PaneInfoTiming.EARLY, # 0 - index=1, - nonspeculative_index=-1 - ) - ), - TestWindowedValue( - ('key', [6, 7, 8]), - expected_timestamp, - [GlobalWindow()], - pane_info=PaneInfo( - is_first=False, - is_last=False, - timing=PaneInfoTiming.EARLY, # 0 - index=2, - nonspeculative_index=-1 - ) - ), - TestWindowedValue( - ('key', [9, 10, 11]), - expected_timestamp, - [GlobalWindow()], - pane_info=PaneInfo( - is_first=False, - is_last=False, - timing=PaneInfoTiming.EARLY, # 0 - index=3, - nonspeculative_index=-1 - ) - ) - ] if compat_version is None else ( - [ - TestWindowedValue( - ('key', [0, 1, 2]), - expected_timestamp, - [GlobalWindow()], - PANE_INFO_UNKNOWN - ), - TestWindowedValue( - ('key', [3, 4, 5]), - expected_timestamp, - [GlobalWindow()], - PANE_INFO_UNKNOWN - ), - TestWindowedValue( - ('key', [6, 7, 8]), - expected_timestamp, - [GlobalWindow()], - PANE_INFO_UNKNOWN - ), - TestWindowedValue( - ('key', [9, 10, 11]), - expected_timestamp, - [GlobalWindow()], - PANE_INFO_UNKNOWN - ) - ] - ) + expected = [ + TestWindowedValue( + ('key', [0, 1, 2]), + expected_timestamp, + [GlobalWindow()], + pane_info=PaneInfo( + is_first=True, + is_last=False, + timing=PaneInfoTiming.EARLY, # 0 + index=0, + nonspeculative_index=-1)), + TestWindowedValue( + ('key', [3, 4, 5]), + expected_timestamp, + [GlobalWindow()], + pane_info=PaneInfo( + is_first=False, + is_last=False, + timing=PaneInfoTiming.EARLY, # 0 + index=1, + nonspeculative_index=-1)), + TestWindowedValue( + ('key', [6, 7, 8]), + expected_timestamp, + [GlobalWindow()], + pane_info=PaneInfo( + is_first=False, + is_last=False, + timing=PaneInfoTiming.EARLY, # 0 + index=2, + nonspeculative_index=-1)), + TestWindowedValue( + ('key', [9, 10, 11]), + expected_timestamp, + [GlobalWindow()], + pane_info=PaneInfo( + is_first=False, + is_last=False, + timing=PaneInfoTiming.EARLY, # 0 + index=3, + nonspeculative_index=-1)) + ] if compat_version is None else ([ + TestWindowedValue(('key', [0, 1, 2]), + expected_timestamp, [GlobalWindow()], + PANE_INFO_UNKNOWN), + TestWindowedValue(('key', [3, 4, 5]), + expected_timestamp, [GlobalWindow()], + PANE_INFO_UNKNOWN), + TestWindowedValue(('key', [6, 7, 8]), + expected_timestamp, [GlobalWindow()], + PANE_INFO_UNKNOWN), + TestWindowedValue(('key', [9, 10, 11]), + expected_timestamp, [GlobalWindow()], + PANE_INFO_UNKNOWN) + ]) options = PipelineOptions(update_compatibility_version=compat_version) options.view_as(StandardOptions).streaming = True @@ -1311,7 +1293,8 @@ def test_constant_k(self): with TestPipeline() as p: pc = p | beam.Create(self.l) with_keys = pc | util.WithKeys('k') - assert_that(with_keys, equal_to([('k', 1), ('k', 2), ('k', 3)], )) + assert_that(with_keys, equal_to([('k', 1), ('k', 2), ('k', 3)], + )) def test_callable_k(self): with TestPipeline() as p: @@ -1336,9 +1319,7 @@ def test_sideinputs(self): si1 = AsList(p | "side input 1" >> beam.Create([1, 2, 3])) si2 = AsSingleton(p | "side input 2" >> beam.Create([10])) with_keys = pc | util.WithKeys( - lambda x, - the_list, - the_singleton: x + sum(the_list) + the_singleton, + lambda x, the_list, the_singleton: x + sum(the_list) + the_singleton, si1, the_singleton=si2) assert_that(with_keys, equal_to([(17, 1), (18, 2), (19, 3)])) @@ -1421,8 +1402,8 @@ def test_buffering_timer_in_fixed_window_streaming(self): start_time = timestamp.Timestamp(0) test_stream = ( TestStream().add_elements([ - TimestampedValue(value, start_time + i) for i, - value in enumerate(GroupIntoBatchesTest._create_test_data()) + TimestampedValue(value, start_time + i) + for i, value in enumerate(GroupIntoBatchesTest._create_test_data()) ]).advance_processing_time(150).advance_watermark_to( start_time + window_duration).advance_watermark_to( start_time + window_duration + diff --git a/sdks/python/apache_beam/transforms/window.py b/sdks/python/apache_beam/transforms/window.py index 900d2b600353..1a0c3007b4f1 100644 --- a/sdks/python/apache_beam/transforms/window.py +++ b/sdks/python/apache_beam/transforms/window.py @@ -486,9 +486,8 @@ def assign(self, context: WindowFn.AssignContext) -> list[IntervalWindow]: (interval_start := Timestamp(micros=s)), interval_start + self.size, ) for s in range( - start.micros, - timestamp.micros - self.size.micros, - -self.period.micros) + start.micros, timestamp.micros - + self.size.micros, -self.period.micros) ] def get_window_coder(self) -> coders.IntervalWindowCoder: diff --git a/sdks/python/apache_beam/typehints/arrow_type_compatibility.py b/sdks/python/apache_beam/typehints/arrow_type_compatibility.py index 34a37a886bab..ac0985f4b9c6 100644 --- a/sdks/python/apache_beam/typehints/arrow_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/arrow_type_compatibility.py @@ -56,8 +56,8 @@ def beam_schema_from_arrow_schema(arrow_schema: pa.Schema) -> schema_pb2.Schema: if arrow_schema.metadata: schema_id = arrow_schema.metadata.get(BEAM_SCHEMA_ID_KEY, None) schema_options = [ - _hydrate_beam_option(value) for key, - value in arrow_schema.metadata.items() + _hydrate_beam_option(value) + for key, value in arrow_schema.metadata.items() if key.startswith(BEAM_SCHEMA_OPTION_KEY_PREFIX) ] else: @@ -78,14 +78,14 @@ def _beam_field_from_arrow_field(arrow_field: pa.Field) -> schema_pb2.Field: if arrow_field.metadata: field_options = [ - _hydrate_beam_option(value) for key, - value in arrow_field.metadata.items() + _hydrate_beam_option(value) + for key, value in arrow_field.metadata.items() if key.startswith(BEAM_FIELD_OPTION_KEY_PREFIX) ] if isinstance(arrow_field.type, pa.StructType): beam_fieldtype.row_type.schema.options.extend([ - _hydrate_beam_option(value) for key, - value in arrow_field.metadata.items() + _hydrate_beam_option(value) + for key, value in arrow_field.metadata.items() if key.startswith(BEAM_SCHEMA_OPTION_KEY_PREFIX) ]) if BEAM_SCHEMA_ID_KEY in arrow_field.metadata: @@ -320,8 +320,8 @@ def from_typehints(element_type, def produce_batch(self, elements): arrays = [ pa.array([getattr(el, name) for el in elements], - type=self._arrow_schema.field(name).type) for name, - _ in self._element_type._fields + type=self._arrow_schema.field(name).type) + for name, _ in self._element_type._fields ] return pa.Table.from_arrays(arrays, schema=self._arrow_schema) @@ -331,8 +331,7 @@ def explode_batch(self, batch: pa.Table): yield self._element_type.user_type( **{ name: val.as_py() - for name, - val in zip(self._arrow_schema.names, row_values) + for name, val in zip(self._arrow_schema.names, row_values) }) def combine_batches(self, batches: List[pa.Table]): diff --git a/sdks/python/apache_beam/typehints/arrow_type_compatibility_test.py b/sdks/python/apache_beam/typehints/arrow_type_compatibility_test.py index 1e9ab3f27bd9..60cb30ff1695 100644 --- a/sdks/python/apache_beam/typehints/arrow_type_compatibility_test.py +++ b/sdks/python/apache_beam/typehints/arrow_type_compatibility_test.py @@ -46,6 +46,7 @@ def test_beam_schema_survives_roundtrip(self, beam_schema): self.assertEqual(beam_schema, roundtripped) + @parameterized_class([ { 'batch_typehint': pa.Table, @@ -88,14 +89,12 @@ def test_beam_schema_survives_roundtrip(self, beam_schema): { 'batch_typehint': pa.Array, 'element_typehint': row_type.RowTypeConstraint.from_fields([ - ("bar", Optional[float]), # noqa: F821 - ("baz", Optional[str]), # noqa: F821 - ]), - 'batch': pa.array([ - { - 'bar': i / 100, 'baz': str(i) - } if i % 7 else None for i in range(100) + ("bar", Optional[float]), # noqa: F821 + ("baz", Optional[str]), # noqa: F821 ]), + 'batch': pa.array([{ + 'bar': i / 100, 'baz': str(i) + } if i % 7 else None for i in range(100)]), } ]) @pytest.mark.uses_pyarrow @@ -195,19 +194,19 @@ def test_hash(self): class ArrowBatchConverterErrorsTest(unittest.TestCase): @parameterized.expand([ - ( - pa.RecordBatch, - row_type.RowTypeConstraint.from_fields([ - ("bar", Optional[float]), # noqa: F821 - ("baz", Optional[str]), # noqa: F821 - ]), - r'batch type must be pa\.Table or pa\.Array', - ), - ( - pa.Table, - Any, - r'Element type .* must be compatible with Beam Schemas', - ), + ( + pa.RecordBatch, + row_type.RowTypeConstraint.from_fields([ + ("bar", Optional[float]), # noqa: F821 + ("baz", Optional[str]), # noqa: F821 + ]), + r'batch type must be pa\.Table or pa\.Array', + ), + ( + pa.Table, + Any, + r'Element type .* must be compatible with Beam Schemas', + ), ]) def test_construction_errors( self, batch_typehint, element_typehint, error_regex): diff --git a/sdks/python/apache_beam/typehints/batch.py b/sdks/python/apache_beam/typehints/batch.py index 35351b147d48..b193c590bec4 100644 --- a/sdks/python/apache_beam/typehints/batch.py +++ b/sdks/python/apache_beam/typehints/batch.py @@ -289,7 +289,8 @@ def __getitem__(self, value): raise ValueError else: dtype = value - return self.NumpyTypeConstraint(dtype, shape=(N, )) + return self.NumpyTypeConstraint( + dtype, shape=(N, )) NumpyArray = NumpyTypeHint() diff --git a/sdks/python/apache_beam/typehints/decorators.py b/sdks/python/apache_beam/typehints/decorators.py index 7050df7016e5..04c9c7be48ad 100644 --- a/sdks/python/apache_beam/typehints/decorators.py +++ b/sdks/python/apache_beam/typehints/decorators.py @@ -352,11 +352,10 @@ def strip_pcoll_helper( my_type: any, has_my_type: Callable[[], bool], my_key: str, - special_containers: List[ - Union['PBegin', 'PDone', 'PCollection']], # noqa: F821 + special_containers: List[Union['PBegin', 'PDone', + 'PCollection']], # noqa: F821 error_str: str, - source_str: str - ) -> 'IOTypeHints': + source_str: str) -> 'IOTypeHints': from apache_beam.pvalue import PCollection if not has_my_type() or not my_type or len(my_type[0]) != 1: @@ -571,8 +570,8 @@ def _unpack_positional_arg_hints(arg, hint): (arg, tuple_constraint, hint)) if isinstance(hint, typehints.TupleConstraint): return tuple( - _unpack_positional_arg_hints(a, t) for a, - t in zip(arg, hint.tuple_types)) + _unpack_positional_arg_hints(a, t) + for a, t in zip(arg, hint.tuple_types)) return (typehints.Any, ) * len(arg) return hint diff --git a/sdks/python/apache_beam/typehints/pandas_type_compatibility.py b/sdks/python/apache_beam/typehints/pandas_type_compatibility.py index ca9523f28349..9fbfd5c3ddfc 100644 --- a/sdks/python/apache_beam/typehints/pandas_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/pandas_type_compatibility.py @@ -91,8 +91,7 @@ PANDAS_TO_BEAM = { pd.Series([], dtype=dtype).dtype: fieldtype - for dtype, - fieldtype in _BIDIRECTIONAL + for dtype, fieldtype in _BIDIRECTIONAL } BEAM_TO_PANDAS = {fieldtype: dtype for dtype, fieldtype in _BIDIRECTIONAL} @@ -196,8 +195,10 @@ def make_null_checking_generator(series): for values in zip(*iterators): yield self._element_type.user_type( - **{column: value - for column, value in zip(self._columns, values)}) + **{ + column: value + for column, value in zip(self._columns, values) + }) def combine_batches(self, batches: List[pd.DataFrame]): return pd.concat(batches) diff --git a/sdks/python/apache_beam/typehints/pandas_type_compatibility_test.py b/sdks/python/apache_beam/typehints/pandas_type_compatibility_test.py index ff66df1ce968..c60228ac2abe 100644 --- a/sdks/python/apache_beam/typehints/pandas_type_compatibility_test.py +++ b/sdks/python/apache_beam/typehints/pandas_type_compatibility_test.py @@ -211,19 +211,19 @@ def test_hash(self): class PandasBatchConverterErrorsTest(unittest.TestCase): @parameterized.expand([ - ( - Any, - row_type.RowTypeConstraint.from_fields([ - ("bar", Optional[float]), # noqa: F821 - ("baz", Optional[str]), # noqa: F821 - ]), - r'batch type must be pd\.Series or pd\.DataFrame', - ), - ( - pd.DataFrame, - Any, - r'Element type must be compatible with Beam Schemas', - ), + ( + Any, + row_type.RowTypeConstraint.from_fields([ + ("bar", Optional[float]), # noqa: F821 + ("baz", Optional[str]), # noqa: F821 + ]), + r'batch type must be pd\.Series or pd\.DataFrame', + ), + ( + pd.DataFrame, + Any, + r'Element type must be compatible with Beam Schemas', + ), ]) def test_construction_errors( self, batch_typehint, element_typehint, error_regex): diff --git a/sdks/python/apache_beam/typehints/pytorch_type_compatibility.py b/sdks/python/apache_beam/typehints/pytorch_type_compatibility.py index f008174bcc03..eab93f54e6b9 100644 --- a/sdks/python/apache_beam/typehints/pytorch_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/pytorch_type_compatibility.py @@ -136,7 +136,8 @@ def __getitem__(self, value): raise ValueError else: dtype = value - return self.PytorchTypeConstraint(dtype, shape=(N, )) + return self.PytorchTypeConstraint( + dtype, shape=(N, )) PytorchTensor = PytorchTypeHint() diff --git a/sdks/python/apache_beam/typehints/row_type.py b/sdks/python/apache_beam/typehints/row_type.py index 9c6050461e32..038eb50d0606 100644 --- a/sdks/python/apache_beam/typehints/row_type.py +++ b/sdks/python/apache_beam/typehints/row_type.py @@ -85,8 +85,7 @@ def __init__( """ # Recursively wrap row types in a RowTypeConstraint self._fields = tuple((name, RowTypeConstraint.from_user_type(typ) or typ) - for name, - typ in fields) + for name, typ in fields) self._user_type = user_type diff --git a/sdks/python/apache_beam/typehints/schemas_test.py b/sdks/python/apache_beam/typehints/schemas_test.py index fbbdc035e11e..6cf37322147e 100644 --- a/sdks/python/apache_beam/typehints/schemas_test.py +++ b/sdks/python/apache_beam/typehints/schemas_test.py @@ -71,8 +71,8 @@ basic_array_types = [Sequence[typ] for typ in all_primitives] basic_map_types = [ - Mapping[key_type, value_type] for key_type, - value_type in itertools.product(all_primitives, all_primitives) + Mapping[key_type, value_type] for key_type, value_type in itertools.product( + all_primitives, all_primitives) ] @@ -129,8 +129,8 @@ def get_test_beam_fieldtype_protos(): basic_map_types = [ schema_pb2.FieldType( map_type=schema_pb2.MapType(key_type=key_type, value_type=value_type)) - for key_type, - value_type in itertools.product(all_primitives, all_primitives) + for key_type, value_type in itertools.product( + all_primitives, all_primitives) ] selected_schemas = [ @@ -139,8 +139,8 @@ def get_test_beam_fieldtype_protos(): schema=schema_pb2.Schema( id='32497414-85e8-46b7-9c90-9a9cc62fe390', fields=[ - schema_pb2.Field(name='field%d' % i, type=typ) for i, - typ in enumerate(all_primitives) + schema_pb2.Field(name='field%d' % i, type=typ) + for i, typ in enumerate(all_primitives) ]))), schema_pb2.FieldType( row_type=schema_pb2.RowType( @@ -184,8 +184,8 @@ def get_test_beam_fieldtype_protos(): schema=schema_pb2.Schema( id='a-schema-with-options', fields=[ - schema_pb2.Field(name='field%d' % i, type=typ) for i, - typ in enumerate(all_primitives) + schema_pb2.Field(name='field%d' % i, type=typ) + for i, typ in enumerate(all_primitives) ], options=[ schema_pb2.Option(name='a_flag'), @@ -270,8 +270,7 @@ def get_test_beam_fieldtype_protos(): value=schema_pb2.FieldValue( atomic_value=schema_pb2.AtomicTypeValue( string='str'))), - ]) for i, - typ in enumerate(all_primitives) + ]) for i, typ in enumerate(all_primitives) ] + [ schema_pb2.Field( name='nested', @@ -571,8 +570,7 @@ def test_unknown_primitive_maps_to_any(self): def test_unknown_atomic_raise_valueerror(self): self.assertRaises( - ValueError, - lambda: typing_from_runner_api( + ValueError, lambda: typing_from_runner_api( schema_pb2.FieldType(atomic_type=schema_pb2.UNSPECIFIED))) def test_int_maps_to_int64(self): diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index fe9007ed63ca..6be5ed691837 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -546,8 +546,8 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): # See https://github.com/python/cpython/issues/102403 for context. if (pop_count == 1 and last_real_opname == 'GET_ITER' and len(state.stack) > 1 and isinstance(state.stack[-2], Const) and - getattr(state.stack[-2].value, '__name__', None) in ( - '', '', '', '')): + getattr(state.stack[-2].value, '__name__', None) + in ('', '', '', '')): pop_count += 1 if depth <= 0 or pop_count > len(state.stack): return_type = Any diff --git a/sdks/python/apache_beam/typehints/trivial_inference_test.py b/sdks/python/apache_beam/typehints/trivial_inference_test.py index c8b59c7ccbf7..fe60974e2806 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference_test.py +++ b/sdks/python/apache_beam/typehints/trivial_inference_test.py @@ -48,8 +48,8 @@ def testJumpOffsets(self): def testBuildListUnpack(self): # Lambda uses BUILD_LIST_UNPACK opcode in Python 3. self.assertReturnType( - typehints.List[int], - lambda _list: [*_list, *_list, *_list], [typehints.List[int]]) + typehints.List[int], lambda _list: [*_list, *_list, *_list], + [typehints.List[int]]) def testBuildTupleUnpack(self): # Lambda uses BUILD_TUPLE_UNPACK opcode in Python 3. @@ -63,16 +63,12 @@ def testBuildTupleUnpack(self): def testBuildSetUnpackOrUpdate(self): self.assertReturnType( typehints.Set[typehints.Union[int, str]], - lambda _list1, - _list2: {*_list1, *_list2, *_list2}, + lambda _list1, _list2: {*_list1, *_list2, *_list2}, [typehints.List[int], typehints.List[str]]) def testBuildMapUnpackOrUpdate(self): self.assertReturnType( - typehints.Dict[str, typehints.Union[int, str, float]], - lambda a, - b, - c: { + typehints.Dict[str, typehints.Union[int, str, float]], lambda a, b, c: { **a, **b, **c }, [ @@ -146,37 +142,29 @@ def reverse(a_b): def testBuildMap(self): self.assertReturnType( - typehints.Dict[typehints.Any, typehints.Any], - lambda k, - v: {}, [int, float]) + typehints.Dict[typehints.Any, typehints.Any], lambda k, v: {}, + [int, float]) self.assertReturnType( typehints.Dict[int, float], lambda k, v: {k: v}, [int, float]) self.assertReturnType( - typehints.Tuple[str, typehints.Dict[int, float]], - lambda k, - v: ('s', { + typehints.Tuple[str, typehints.Dict[int, float]], lambda k, v: + ('s', { k: v }), [int, float]) self.assertReturnType( typehints.Dict[int, typehints.Union[float, str]], - lambda k1, - v1, - k2, - v2: { + lambda k1, v1, k2, v2: { k1: v1, k2: v2 }, [int, float, int, str]) # Constant map. self.assertReturnType( - typehints.Dict[str, typehints.Union[int, float]], - lambda a, - b: { + typehints.Dict[str, typehints.Union[int, float]], lambda a, b: { 'a': a, 'b': b }, [int, float]) self.assertReturnType( typehints.Tuple[int, typehints.Dict[str, typehints.Union[int, float]]], - lambda a, - b: (4, { + lambda a, b: (4, { 'a': a, 'b': b }), [int, float]) @@ -199,21 +187,20 @@ def testSimpleList(self): def testListComprehension(self): self.assertReturnType( - typehints.List[int], - lambda xs: [x for x in xs], [typehints.Tuple[int, ...]]) + typehints.List[int], lambda xs: [x for x in xs], + [typehints.Tuple[int, ...]]) def testTupleListComprehension(self): self.assertReturnType( - typehints.List[int], - lambda xs: [x for x in xs], [typehints.Tuple[int, int, int]]) + typehints.List[int], lambda xs: [x for x in xs], + [typehints.Tuple[int, int, int]]) self.assertReturnType( - typehints.List[typehints.Union[int, float]], - lambda xs: [x for x in xs], [typehints.Tuple[int, float]]) + typehints.List[typehints.Union[int, float]], lambda xs: [x for x in xs], + [typehints.Tuple[int, float]]) expected = typehints.List[typehints.Tuple[str, int]] self.assertReturnType( - expected, - lambda kvs: [(kvs[0], v) for v in kvs[1]], + expected, lambda kvs: [(kvs[0], v) for v in kvs[1]], [typehints.Tuple[str, typehints.Iterable[int]]]) self.assertReturnType( typehints.List[typehints.Tuple[str, typehints.Union[str, int], int]], @@ -231,8 +218,8 @@ def foo(x, y): def testGeneratorComprehension(self): self.assertReturnType( - typehints.Iterable[int], - lambda xs: (x for x in xs), [typehints.Tuple[int, ...]]) + typehints.Iterable[int], lambda xs: (x for x in xs), + [typehints.Tuple[int, ...]]) def testBinOp(self): self.assertReturnType(int, lambda a, b: a + b, [int, int]) @@ -240,9 +227,8 @@ def testBinOp(self): self.assertReturnType( typehints.Any, lambda a, b: a + b, [int, typehints.Any]) self.assertReturnType( - typehints.List[typehints.Union[int, str]], - lambda a, - b: a + b, [typehints.List[int], typehints.List[str]]) + typehints.List[typehints.Union[int, str]], lambda a, b: a + b, + [typehints.List[int], typehints.List[str]]) def testBinOpPromotion(self): self.assertReturnType(int, lambda a, b: a + b, [int, bool]) @@ -300,8 +286,8 @@ def testBuiltins(self): def testGetAttr(self): self.assertReturnType( - typehints.Tuple[str, typehints.Any], - lambda: (typehints.__doc__, typehints.fake)) + typehints.Tuple[str, typehints.Any], lambda: + (typehints.__doc__, typehints.fake)) def testSetAttr(self): def fn(obj, flag): @@ -408,16 +394,13 @@ def fn(x1, x2, *unused_args): self.assertReturnType( typehints.Tuple[typehints.Union[str, float, int], typehints.Union[str, float, int]], - lambda x1, - x2, - _list: fn(x1, x2, *_list), [str, float, typehints.List[int]]) + lambda x1, x2, _list: fn(x1, x2, *_list), + [str, float, typehints.List[int]]) # No *args self.assertReturnType( typehints.Tuple[typehints.Union[str, typehints.List[int]], typehints.Union[str, typehints.List[int]]], - lambda x1, - x2, - _list: fn(x1, x2, *_list), [str, typehints.List[int]]) + lambda x1, x2, _list: fn(x1, x2, *_list), [str, typehints.List[int]]) def testCallFunctionEx(self): # Test when fn arguments are built using BUiLD_LIST. @@ -426,8 +409,7 @@ def fn(*args): self.assertReturnType( typehints.List[typehints.Union[str, float]], - lambda x1, - x2: fn(*[x1, x2]), [str, float]) + lambda x1, x2: fn(*[x1, x2]), [str, float]) def testCallFunctionExKwargs(self): def fn(x1, x2, **unused_kwargs): @@ -435,10 +417,8 @@ def fn(x1, x2, **unused_kwargs): # Keyword args are currently unsupported for CALL_FUNCTION_EX. self.assertReturnType( - typehints.Any, - lambda x1, - x2, - _dict: fn(x1, x2, **_dict), [str, float, typehints.List[int]]) + typehints.Any, lambda x1, x2, _dict: fn(x1, x2, **_dict), + [str, float, typehints.List[int]]) def testInstanceToType(self): class MyClass(object): @@ -483,22 +463,19 @@ def method(self): def testRow(self): self.assertReturnType( row_type.RowTypeConstraint.from_fields([('x', int), ('y', str)]), - lambda x, - y: beam.Row(x=x + 1, y=y), [int, str]) + lambda x, y: beam.Row(x=x + 1, y=y), [int, str]) self.assertReturnType( row_type.RowTypeConstraint.from_fields([('x', int), ('y', str)]), lambda x: beam.Row(x=x, y=str(x)), [int]) def testRowAttr(self): self.assertReturnType( - typehints.Tuple[int, str], - lambda row: (row.x, getattr(row, 'y')), + typehints.Tuple[int, str], lambda row: (row.x, getattr(row, 'y')), [row_type.RowTypeConstraint.from_fields([('x', int), ('y', str)])]) def testRowMissingAttr(self): self.assertReturnType( - typehints.Any, - lambda row: getattr(row, '_asdict'), + typehints.Any, lambda row: getattr(row, '_asdict'), [row_type.RowTypeConstraint.from_fields([('x', int), ('y', str)])]) def testFString(self): diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index 68fc09cefaa4..ef86a0eeb075 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -708,8 +708,8 @@ def _consistent_with_check_(self, sub): return ( isinstance(sub, self.__class__) and len(sub.tuple_types) == len(self.tuple_types) and all( - is_consistent_with(sub_elem, elem) for sub_elem, - elem in zip(sub.tuple_types, self.tuple_types))) + is_consistent_with(sub_elem, elem) + for sub_elem, elem in zip(sub.tuple_types, self.tuple_types))) def type_check(self, tuple_instance): if not isinstance(tuple_instance, tuple): diff --git a/sdks/python/apache_beam/utils/urns.py b/sdks/python/apache_beam/utils/urns.py index 2647a0200bde..a8074137d178 100644 --- a/sdks/python/apache_beam/utils/urns.py +++ b/sdks/python/apache_beam/utils/urns.py @@ -149,14 +149,12 @@ def register_pickle_urn(cls, pickle_urn): """Registers and implements the given urn via pickling. """ inspect.currentframe().f_back.f_locals['to_runner_api_parameter'] = ( - lambda self, - context: + lambda self, context: (pickle_urn, wrappers_pb2.BytesValue(value=pickler.dumps(self)))) cls.register_urn( pickle_urn, wrappers_pb2.BytesValue, - lambda proto, - unused_context: pickler.loads(proto.value)) + lambda proto, unused_context: pickler.loads(proto.value)) def to_runner_api( self, context: 'PipelineContext') -> beam_runner_api_pb2.FunctionSpec: diff --git a/sdks/python/apache_beam/yaml/examples/testing/examples_test.py b/sdks/python/apache_beam/yaml/examples/testing/examples_test.py index ee35e3430766..4b34b735744c 100644 --- a/sdks/python/apache_beam/yaml/examples/testing/examples_test.py +++ b/sdks/python/apache_beam/yaml/examples/testing/examples_test.py @@ -314,7 +314,8 @@ def parse_test_methods(cls, path: str): @classmethod def create_test_suite(cls, name: str, path: str): - return type(name, (unittest.TestCase, ), dict(cls.parse_test_methods(path))) + return type( + name, (unittest.TestCase, ), dict(cls.parse_test_methods(path))) @classmethod def register_test_preprocessor(cls, test_names: Union[str, List]): diff --git a/sdks/python/apache_beam/yaml/generate_yaml_docs.py b/sdks/python/apache_beam/yaml/generate_yaml_docs.py index f66b095efbfc..296fa211f43e 100644 --- a/sdks/python/apache_beam/yaml/generate_yaml_docs.py +++ b/sdks/python/apache_beam/yaml/generate_yaml_docs.py @@ -234,8 +234,7 @@ def transform_docs(transform_base, transforms, providers, extra_docs=''): longest( lambda t: longest( lambda p: add_transform_links( - t, p.description(t), providers.keys()), - providers[t]), + t, p.description(t), providers.keys()), providers[t]), transforms).replace('::\n', '\n\n :::yaml\n'), '', extra_docs, diff --git a/sdks/python/apache_beam/yaml/json_utils.py b/sdks/python/apache_beam/yaml/json_utils.py index ef8bfcc026e5..6b17faec713b 100644 --- a/sdks/python/apache_beam/yaml/json_utils.py +++ b/sdks/python/apache_beam/yaml/json_utils.py @@ -43,8 +43,10 @@ schema_pb2.INT16: 'integer', schema_pb2.INT32: 'integer', schema_pb2.FLOAT: 'number', - **{v: k - for k, v in JSON_ATOMIC_TYPES_TO_BEAM.items()} + **{ + v: k + for k, v in JSON_ATOMIC_TYPES_TO_BEAM.items() + } } diff --git a/sdks/python/apache_beam/yaml/readme_test.py b/sdks/python/apache_beam/yaml/readme_test.py index 4a9fad991c53..ce9d6269e545 100644 --- a/sdks/python/apache_beam/yaml/readme_test.py +++ b/sdks/python/apache_beam/yaml/readme_test.py @@ -259,8 +259,7 @@ def test(self): # in precommits with mock.patch( 'apache_beam.yaml.yaml_provider.ExternalProvider.create_transform', - lambda *args, - **kwargs: _Fakes.SomeTransform(*args, **kwargs)): + lambda *args, **kwargs: _Fakes.SomeTransform(*args, **kwargs)): p = beam.Pipeline(options=PipelineOptions(**options)) yaml_transform.expand_pipeline( p, modified_yaml, yaml_provider.merge_providers([test_provider])) @@ -343,9 +342,8 @@ def extract_name(input_spec): yield ( test_name + '_' + suffix, # The yp=... ts=... is to capture the looped closure values. - lambda _, - yp=yaml_pipeline, - ts=test_spec: yaml_testing.run_test(yp, ts)) + lambda _, yp=yaml_pipeline, ts=test_spec: yaml_testing. + run_test(yp, ts)) code_lines = None elif code_lines is not None: diff --git a/sdks/python/apache_beam/yaml/yaml_io.py b/sdks/python/apache_beam/yaml/yaml_io.py index e0cefe79a7d6..ac1e7941b163 100644 --- a/sdks/python/apache_beam/yaml/yaml_io.py +++ b/sdks/python/apache_beam/yaml/yaml_io.py @@ -78,8 +78,8 @@ def write_to_text(pcoll, path: str): """ try: field_names = [ - name for name, - _ in schemas.named_fields_from_element_type(pcoll.element_type) + name for name, _ in schemas.named_fields_from_element_type( + pcoll.element_type) ] except Exception as exn: raise ValueError( @@ -644,8 +644,8 @@ def write_to_tfrecord( """ try: field_names = [ - name for name, - _ in schemas.named_fields_from_element_type(pcoll.element_type) + name for name, _ in schemas.named_fields_from_element_type( + pcoll.element_type) ] except Exception as exn: raise ValueError( diff --git a/sdks/python/apache_beam/yaml/yaml_io_test.py b/sdks/python/apache_beam/yaml/yaml_io_test.py index 2a6a8f16b085..3ae9f19b9b8d 100644 --- a/sdks/python/apache_beam/yaml/yaml_io_test.py +++ b/sdks/python/apache_beam/yaml/yaml_io_test.py @@ -230,9 +230,10 @@ def test_read_avro(self): ''' % json.dumps(self._avro_schema)) assert_that( result, - equal_to( - [beam.Row(label='37a', rank=1), # linebreak - beam.Row(label='389a', rank=2)])) + equal_to([ + beam.Row(label='37a', rank=1), # linebreak + beam.Row(label='389a', rank=2) + ])) def test_read_json(self): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( diff --git a/sdks/python/apache_beam/yaml/yaml_join_test.py b/sdks/python/apache_beam/yaml/yaml_join_test.py index 5d43b1cdb3ab..342fe72bd74c 100644 --- a/sdks/python/apache_beam/yaml/yaml_join_test.py +++ b/sdks/python/apache_beam/yaml/yaml_join_test.py @@ -51,8 +51,8 @@ def expand(self, pcoll): @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - None, + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is None, 'Do not run this test on precommit suites.') class YamlJoinTest(unittest.TestCase): def test_basic_join(self): diff --git a/sdks/python/apache_beam/yaml/yaml_mapping.py b/sdks/python/apache_beam/yaml/yaml_mapping.py index 9985e9756fd8..1011d45383dc 100644 --- a/sdks/python/apache_beam/yaml/yaml_mapping.py +++ b/sdks/python/apache_beam/yaml/yaml_mapping.py @@ -602,9 +602,10 @@ def explode_zip(base, fields): pcoll | beam.FlatMap( lambda row: - (explode_cross_product if cross_product else explode_zip) - ({name: getattr(row, name) - for name in all_fields}, to_explode))) + (explode_cross_product if cross_product else explode_zip)({ + name: getattr(row, name) + for name in all_fields + }, to_explode))) def infer_output_type(self, input_type): return row_type.RowTypeConstraint.from_fields([( diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py index 171f229746a2..1145d6393a52 100755 --- a/sdks/python/apache_beam/yaml/yaml_provider.py +++ b/sdks/python/apache_beam/yaml/yaml_provider.py @@ -303,14 +303,9 @@ def maven_jar( classifier=None, appendix=None): return ExternalJavaProvider( - urns, - lambda: subprocess_server.JavaJarServer.path_to_maven_jar( - artifact_id=artifact_id, - group_id=group_id, - version=version, - repository=repository, - classifier=classifier, - appendix=appendix)) + urns, lambda: subprocess_server.JavaJarServer.path_to_maven_jar( + artifact_id=artifact_id, group_id=group_id, version=version, + repository=repository, classifier=classifier, appendix=appendix)) @ExternalProvider.register_provider_type('beamJar') @@ -322,8 +317,7 @@ def beam_jar( version=beam_version, artifact_id=None): return ExternalJavaProvider( - urns, - lambda: subprocess_server.JavaJarServer.path_to_beam_jar( + urns, lambda: subprocess_server.JavaJarServer.path_to_beam_jar( gradle_target=gradle_target, version=version, artifact_id=artifact_id) ) @@ -359,8 +353,7 @@ def cache_artifacts(self): class ExternalJavaProvider(ExternalProvider): def __init__(self, urns, jar_provider, classpath=None): super().__init__( - urns, - lambda: external.JavaJarExpansionService( + urns, lambda: external.JavaJarExpansionService( jar_provider(), classpath=classpath)) self._jar_provider = jar_provider self._classpath = classpath @@ -401,8 +394,8 @@ def python(urns, provider_base_path, packages=()): return ExternalPythonProvider(urns, provider_base_path, packages) else: return InlineProvider({ - name: - python_callable.PythonCallableWithSource.load_from_source(constructor) + name: python_callable.PythonCallableWithSource.load_from_source( + constructor) for (name, constructor) in urns.items() }) @@ -595,9 +588,8 @@ def type_of(p): for param in cls.get_docs(factory).params } - names_and_types = [ - (name, typing_to_runner_api(type_of(p))) for name, p in params.items() - ] + names_and_types = [(name, typing_to_runner_api(type_of(p))) + for name, p in params.items()] return schema_pb2.Schema( fields=[ schema_pb2.Field(name=name, type=type, description=docs.get(name)) diff --git a/sdks/python/apache_beam/yaml/yaml_provider_unit_test.py b/sdks/python/apache_beam/yaml/yaml_provider_unit_test.py index 9cf61acd620a..fe8b6c7b89a3 100644 --- a/sdks/python/apache_beam/yaml/yaml_provider_unit_test.py +++ b/sdks/python/apache_beam/yaml/yaml_provider_unit_test.py @@ -97,8 +97,7 @@ def tearDownClass(cls): @mock.patch( 'apache_beam.yaml.yaml_provider.ExternalProvider.provider_from_spec', - lambda _, - x: x) + lambda _, x: x) def test_include_file(self): flattened = [ SafeLineLoader.strip_metadata(spec) @@ -107,8 +106,7 @@ def test_include_file(self): self.INLINE_PROVIDER, { 'include': self.to_include - }, - ]) + }, ]) ] self.assertEqual([ @@ -119,8 +117,7 @@ def test_include_file(self): @mock.patch( 'apache_beam.yaml.yaml_provider.ExternalProvider.provider_from_spec', - lambda _, - x: x) + lambda _, x: x) def test_include_url(self): flattened = [ SafeLineLoader.strip_metadata(spec) @@ -129,8 +126,7 @@ def test_include_url(self): self.INLINE_PROVIDER, { 'include': 'file:///' + self.to_include - }, - ]) + }, ]) ] self.assertEqual([ @@ -141,8 +137,7 @@ def test_include_url(self): @mock.patch( 'apache_beam.yaml.yaml_provider.ExternalProvider.provider_from_spec', - lambda _, - x: x) + lambda _, x: x) def test_nested_include(self): flattened = [ SafeLineLoader.strip_metadata(spec) @@ -151,8 +146,7 @@ def test_nested_include(self): self.INLINE_PROVIDER, { 'include': self.to_include_nested - }, - ]) + }, ]) ] self.assertEqual([ diff --git a/sdks/python/apache_beam/yaml/yaml_testing.py b/sdks/python/apache_beam/yaml/yaml_testing.py index 689ba208a9ae..9634b58e615d 100644 --- a/sdks/python/apache_beam/yaml/yaml_testing.py +++ b/sdks/python/apache_beam/yaml/yaml_testing.py @@ -224,8 +224,7 @@ def require_output_or_outputs(name_or_names): input_spec = scope.get_transform_spec(transform_id)['input'] return { tag: require_output_or_outputs(input_ref) - for tag, - input_ref in yaml_transform.empty_if_explicitly_empty( + for tag, input_ref in yaml_transform.empty_if_explicitly_empty( input_spec).items() } diff --git a/sdks/python/apache_beam/yaml/yaml_transform.py b/sdks/python/apache_beam/yaml/yaml_transform.py index 2c6734dbf292..8710fe379c37 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform.py +++ b/sdks/python/apache_beam/yaml/yaml_transform.py @@ -404,8 +404,7 @@ def expand(pcolls): provider.create_transform( spec['type'], config, - lambda config, - input_pcolls=input_pcolls: self.create_ptransform( + lambda config, input_pcolls=input_pcolls: self.create_ptransform( config, input_pcolls))) # TODO(robertwb): Should we have a better API for adding annotations # than this? @@ -935,8 +934,10 @@ def lift_config(spec): if 'config' not in spec: common_params = 'name', 'type', 'input', 'output', 'transforms' return { - 'config': {k: v - for (k, v) in spec.items() if k not in common_params}, + 'config': { + k: v + for (k, v) in spec.items() if k not in common_params + }, **{ k: v for (k, v) in spec.items() # diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py b/sdks/python/apache_beam/yaml/yaml_transform_test.py index 5159dd4409f3..1a99507d76d7 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py @@ -783,8 +783,8 @@ class AnnotatingProvider(yaml_provider.InlineProvider): """ def __init__(self, name, transform_names): super().__init__({ - transform_name: - lambda: beam.Map(lambda x: (x if type(x) == tuple else ()) + (name, )) + transform_name: lambda: beam.Map( + lambda x: (x if type(x) == tuple else ()) + (name, )) for transform_name in transform_names.strip().split() }) self._name = name @@ -836,8 +836,7 @@ def test_prefers_same_provider(self): 'provider1', # All of the providers vend A, but since the input was produced # by provider1, we prefer to use that again. - 'provider1', - # Similarly for C. + 'provider1', # Similarly for C. 'provider1')]), label='StartWith1') @@ -857,10 +856,8 @@ def test_prefers_same_provider(self): result2, equal_to([( # provider2 was necessarily chosen for P2 - 'provider2', - # Unlike above, we choose provider2 to implement A. - 'provider2', - # Likewise for C. + 'provider2', # Unlike above, we choose provider2 to implement A. + 'provider2', # Likewise for C. 'provider2')]), label='StartWith2') diff --git a/sdks/python/apache_beam/yaml/yaml_udf_test.py b/sdks/python/apache_beam/yaml/yaml_udf_test.py index 1a50568c3d20..3d664ab9de41 100644 --- a/sdks/python/apache_beam/yaml/yaml_udf_test.py +++ b/sdks/python/apache_beam/yaml/yaml_udf_test.py @@ -136,8 +136,8 @@ def test_map_to_fields_filter_inline_py(self): @staticmethod @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - None, + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is None, 'Do not run this test on precommit suites.') def test_map_to_fields_sql_reserved_keyword(): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( @@ -168,8 +168,8 @@ def test_map_to_fields_sql_reserved_keyword(): @staticmethod @unittest.skipIf( - TestPipeline().get_pipeline_options().view_as(StandardOptions).runner is - None, + TestPipeline().get_pipeline_options().view_as(StandardOptions).runner + is None, 'Do not run this test on precommit suites.') def test_map_to_fields_sql_reserved_keyword_append(): with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( diff --git a/sdks/python/apache_beam/yaml/yaml_utils.py b/sdks/python/apache_beam/yaml/yaml_utils.py index ce71c2d7a31e..6e8a97c8b4f8 100644 --- a/sdks/python/apache_beam/yaml/yaml_utils.py +++ b/sdks/python/apache_beam/yaml/yaml_utils.py @@ -61,8 +61,8 @@ def create_uuid(cls): def strip_metadata(cls, spec, tagged_str=True): if isinstance(spec, Mapping): return { - cls.strip_metadata(key, tagged_str): - cls.strip_metadata(value, tagged_str) + cls.strip_metadata(key, tagged_str): cls.strip_metadata( + value, tagged_str) for (key, value) in spec.items() if key not in ('__line__', '__uuid__') } diff --git a/sdks/python/setup.cfg b/sdks/python/setup.cfg index 0d1e1c3347e4..a25ee68d9378 100644 --- a/sdks/python/setup.cfg +++ b/sdks/python/setup.cfg @@ -59,6 +59,7 @@ known_standard_library = dataclasses indent_width = 2 continuation_indent_width = 4 column_limit = 80 +allow_multiline_lambdas = False allow_split_before_dict_value = False blank_line_before_module_docstring = True blank_line_before_nested_class_or_def = False From a087bcda824242a7309986a8261d4069c453c6f8 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 1 May 2025 11:27:46 -0400 Subject: [PATCH 3/7] formatting fixes with master --- .../dataframe/pandas_doctests_test.py | 8 ++++---- sdks/python/apache_beam/pipeline_test.py | 18 +++++++++--------- sdks/python/apache_beam/transforms/external.py | 4 ++-- sdks/python/apache_beam/yaml/yaml_provider.py | 6 ++---- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py index aeafc4911ed7..259d1c0253a1 100644 --- a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py +++ b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py @@ -719,10 +719,10 @@ def test_datetime_tests(self): 'pandas.core.indexes.accessors.TimedeltaProperties.to_pytimedelta': [ '*' ], # pylint: enable=line-too-long - # Test uses to_datetime. Beam calls to_datetime element-wise, and - # therefore the .tz attribute is not evaluated on entire Series. - # Hence, .tz becomes None, unless explicitly set. - # See: see test_tz_with_utc_zone_set_explicitly + # Test uses to_datetime. Beam calls to_datetime element-wise, and + # therefore the .tz attribute is not evaluated on entire Series. + # Hence, .tz becomes None, unless explicitly set. + # See: see test_tz_with_utc_zone_set_explicitly 'pandas.core.indexes.accessors.DatetimeProperties.tz': ['*'], }) datetimelike_result = doctests.testmod( diff --git a/sdks/python/apache_beam/pipeline_test.py b/sdks/python/apache_beam/pipeline_test.py index 9cdab3d55aba..b18bc0d668e2 100644 --- a/sdks/python/apache_beam/pipeline_test.py +++ b/sdks/python/apache_beam/pipeline_test.py @@ -1441,32 +1441,32 @@ def file_artifact(path, hash, staged_name): 'e1': beam_runner_api_pb2.Environment( dependencies=[file_artifact('a1', 'x', 'dest')]), 'e2': beam_runner_api_pb2.Environment( - dependencies=[file_artifact('a2', 'x', 'dest') - ]), # Different hash. + dependencies=[file_artifact('a2', 'x', 'dest')]), 'e3': beam_runner_api_pb2.Environment( dependencies=[file_artifact('a3', 'y', 'dest') - ]), # Different destination. + ]), # Different hash. 'e4': beam_runner_api_pb2.Environment( - dependencies=[file_artifact('a4', 'y', 'dest2')]), - # Multiple files with same hash and destinations. + dependencies=[file_artifact('a4', 'y', 'dest2') + ]), # Different destination. 'e5': beam_runner_api_pb2.Environment( dependencies=[ file_artifact('a1', 'x', 'dest'), file_artifact('b1', 'xb', 'destB') - ]), + ]), # Multiple files with same hash and destinations. 'e6': beam_runner_api_pb2.Environment( dependencies=[ file_artifact('a2', 'x', 'dest'), file_artifact('b2', 'xb', 'destB') - ]), # Overlapping, but not identical, files. + ]), 'e7': beam_runner_api_pb2.Environment( dependencies=[ file_artifact('a1', 'x', 'dest'), file_artifact('b2', 'y', 'destB') - ]), # Same files as first, but differing other properties. + ]), # Overlapping, but not identical, files. 'e0': beam_runner_api_pb2.Environment( resource_hints={'hint': b'value'}, - dependencies=[file_artifact('a1', 'x', 'dest')]), + dependencies=[file_artifact('a1', 'x', 'dest')] + ), # Same files as first, but differing other properties. })) Pipeline.merge_compatible_environments(proto) diff --git a/sdks/python/apache_beam/transforms/external.py b/sdks/python/apache_beam/transforms/external.py index 4edfcaf47337..f6d361a9ecac 100644 --- a/sdks/python/apache_beam/transforms/external.py +++ b/sdks/python/apache_beam/transforms/external.py @@ -462,8 +462,8 @@ def __init__( # construction. underlying_transform_id = ( self._managed_replacement.underlying_transform_identifier) - if not (underlying_transform_id in - MANAGED_TRANSFORM_URN_TO_JAR_TARGET_MAPPING): + if not (underlying_transform_id + in MANAGED_TRANSFORM_URN_TO_JAR_TARGET_MAPPING): raise ValueError( 'Could not find an expansion service jar for the managed ' + 'transform ' + underlying_transform_id) diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py index da184403e25f..0609c4ef8df5 100755 --- a/sdks/python/apache_beam/yaml/yaml_provider.py +++ b/sdks/python/apache_beam/yaml/yaml_provider.py @@ -339,8 +339,7 @@ def beam_jar( version=beam_version, artifact_id=None): return ExternalJavaProvider( - urns, - lambda: subprocess_server.JavaJarServer.path_to_beam_jar( + urns, lambda: subprocess_server.JavaJarServer.path_to_beam_jar( gradle_target=gradle_target, version=version, artifact_id=artifact_id ), managed_replacement=managed_replacement) @@ -378,8 +377,7 @@ class ExternalJavaProvider(ExternalProvider): def __init__( self, urns, jar_provider, managed_replacement=None, classpath=None): super().__init__( - urns, - lambda: external.JavaJarExpansionService( + urns, lambda: external.JavaJarExpansionService( jar_provider(), classpath=classpath), managed_replacement) self._jar_provider = jar_provider From 40937ed96b563b106b3112e43aad2ae4cc63f288 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 1 May 2025 11:30:57 -0400 Subject: [PATCH 4/7] add to changes.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 5187f7df4114..d1ba19c5bf4c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -80,6 +80,7 @@ ## Breaking Changes * X behavior was changed ([#X](https://github.com/apache/beam/issues/X)). +* Yapf version upgraded to 0.43.0 for formatting (Python) ([#34801](https://github.com/apache/beam/pull/34801/)). ## Deprecations From cc2bd2fb1dca886432103ffabec5c8d219f36d09 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 1 May 2025 11:55:25 -0400 Subject: [PATCH 5/7] lint skips for disabled formatting files --- sdks/python/apache_beam/dataframe/pandas_doctests_test.py | 2 ++ .../apache_beam/runners/dataflow/dataflow_metrics_test.py | 1 + 2 files changed, 3 insertions(+) diff --git a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py index 259d1c0253a1..d32b7858c773 100644 --- a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py +++ b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py @@ -23,6 +23,8 @@ from apache_beam.dataframe.frames import PD_VERSION from apache_beam.dataframe.pandas_top_level_functions import _is_top_level_function +# pylint: skip-file + @unittest.skipIf( sys.platform == 'win32', '[https://github.com/apache/beam/issues/20361]') diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py index be8a08528fe2..ed1cd1500823 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_metrics_test.py @@ -21,6 +21,7 @@ """ # pytype: skip-file +# pylint: skip-file import types import unittest From 4cd88dbf9314fcc15e2e251bc2155db6c0214aaa Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 1 May 2025 12:06:31 -0400 Subject: [PATCH 6/7] disable check F821 in decorators.py --- sdks/python/apache_beam/typehints/decorators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/typehints/decorators.py b/sdks/python/apache_beam/typehints/decorators.py index 04c9c7be48ad..94bce04cb5b7 100644 --- a/sdks/python/apache_beam/typehints/decorators.py +++ b/sdks/python/apache_beam/typehints/decorators.py @@ -352,7 +352,7 @@ def strip_pcoll_helper( my_type: any, has_my_type: Callable[[], bool], my_key: str, - special_containers: List[Union['PBegin', 'PDone', + special_containers: List[Union['PBegin', 'PDone', # noqa: F821 'PCollection']], # noqa: F821 error_str: str, source_str: str) -> 'IOTypeHints': From 8c5844f3a964b2a5093d6c8fb67784ca93d06669 Mon Sep 17 00:00:00 2001 From: Jack McCluskey Date: Thu, 1 May 2025 12:20:00 -0400 Subject: [PATCH 7/7] another noqa statement --- sdks/python/apache_beam/typehints/decorators.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sdks/python/apache_beam/typehints/decorators.py b/sdks/python/apache_beam/typehints/decorators.py index 94bce04cb5b7..dd650aa2fc0b 100644 --- a/sdks/python/apache_beam/typehints/decorators.py +++ b/sdks/python/apache_beam/typehints/decorators.py @@ -352,8 +352,10 @@ def strip_pcoll_helper( my_type: any, has_my_type: Callable[[], bool], my_key: str, - special_containers: List[Union['PBegin', 'PDone', # noqa: F821 - 'PCollection']], # noqa: F821 + special_containers: List[Union[ + 'PBegin', # noqa: F821 + 'PDone', # noqa: F821 + 'PCollection']], # noqa: F821 error_str: str, source_str: str) -> 'IOTypeHints': from apache_beam.pvalue import PCollection