Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions sdks/python/apache_beam/typehints/intrinsic_one_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Defines the actions intrinsic bytecodes have on the frame.

Each function here corresponds to a bytecode documented in
https://docs.python.org/3/library/dis.html . The first argument is a (mutable)
FrameState object, the second the integer opcode argument.

Bytecodes with more complicated behavior (e.g. modifying the program counter)
are handled inline rather than here.

For internal use only; no backwards-compatibility guarantees.
"""
# pytype: skip-file

from . import opcodes


def intrinsic_1_invalid(state, arg):
pass


def intrinsic_print(state, arg):
pass


def intrinsic_import_star(state, arg):
pass


def intrinsic_stopiteration_error(state, arg):
pass


def intrinsic_async_gen_wrap(state, arg):
pass


def intrinsic_unary_positive(state, arg):
opcodes.unary_positive(state, arg)
pass


def intrinsic_list_to_tuple(state, arg):
opcodes.list_to_tuple(state, arg)
pass


def intrinsic_typevar(state, arg):
pass


def intrinsic_paramspec(state, arg):
pass


def intrinsic_typevartuple(state, arg):
pass


def intrinsic_subscript_generic(state, arg):
pass


def intrinsic_typealias(state, arg):
pass


# The order of operations in the table of the intrinsic one operations is
# defined in https://docs.python.org/3/library/dis.html#opcode-CALL_INTRINSIC_1
# and may change between minor versions.
INT_ONE_OPS = tuple([
intrinsic_1_invalid,
intrinsic_print,
intrinsic_import_star,
intrinsic_stopiteration_error,
intrinsic_async_gen_wrap,
intrinsic_unary_positive,
intrinsic_list_to_tuple,
intrinsic_typevar,
intrinsic_paramspec,
intrinsic_typevartuple,
intrinsic_subscript_generic,
intrinsic_typealias
])
40 changes: 40 additions & 0 deletions sdks/python/apache_beam/typehints/intrinsic_one_ops_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Tests for apache_beam.typehints.intrinsic_one_ops."""

# pytype: skip-file

import dis
import sys
import unittest

from apache_beam.typehints import intrinsic_one_ops


class IntrinsicOneOpsTest(unittest.TestCase):
def test_unary_intrinsic_ops_are_in_the_same_order_as_in_cpython(self):
if sys.version_info >= (3, 12):
dis_order = dis.__dict__['_intrinsic_1_descs']
beam_ops = [fn.__name_upper() for fn in intrinsic_one_ops.INT_ONE_OPS]
for fn in intrinsic_one_ops.INT_ONE_OPS:
beam_ops.append(fn.__name__.upper())
self.assertListEqual(dis_order, beam_ops)


if __name__ == '__main__':
unittest.main()
65 changes: 65 additions & 0 deletions sdks/python/apache_beam/typehints/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,26 @@ def nop(unused_state, unused_arg):
pass


resume = nop


def pop_top(state, unused_arg):
state.stack.pop()


def end_for(state, unused_arg):
state.stack.pop()
state.stack.pop()


def end_send(state, unused_arg):
del state.stack[-2]


def copy(state, arg):
state.stack.append(state.stack[-arg])


def rot_n(state, n):
state.stack[-n:] = [state.stack[-1]] + state.stack[-n:-1]

Expand Down Expand Up @@ -188,6 +204,31 @@ def store_subscr(unused_state, unused_args):
pass


def binary_slice(state, args):
_ = state.stack.pop()
_ = state.stack.pop()
base = Const.unwrap(state.stack.pop())
if base is str:
out = base
elif isinstance(base, typehints.IndexableTypeConstraint):
out = base
else:
out = element_type(base)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my understanding, what is the interpretation of
out = element_type(base)?

does this say: take a container, and return the type of individual element that lives in the container? Wouldn't a slice always return a container as opposed to individual elements?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm that's actually an implementation interpretation I hadn't thought of. The documentation for the op is here - https://docs.python.org/3/library/dis.html#opcode-BINARY_SLICE and I was adapting the BINARY_SUBSCR code (which is doing similar work with a single index arg instead of a slice.) I think most cases will be caught by the indexable type constraint (so the type of the container is what we return) but that else case might not be handling things correctly. I'll see if I can get a bit more of an understanding on this op code specifically.

state.stack.append(out)


def store_slice(state, args):
"""Clears elements off the stack like it was constructing a
container, but leaves the container type back at stack[-1]
since that's all that is relevant for type checking.
"""
_ = state.stack.pop() # End
_ = state.stack.pop() # Start
container = state.stack.pop() # Container type
_ = state.stack.pop() # Values that would go in container
state.stack.append(container)


print_item = pop_top
print_newline = nop

Expand Down Expand Up @@ -347,6 +388,14 @@ def load_attr(state, arg):
Will replace with Any for builtin methods, but these don't have bytecode in
CPython so that's okay.
"""
if (sys.version_info.major, sys.version_info.minor) >= (3, 12):
# Load attribute's arg was bit-shifted in 3.12 to also allow for
# adding extra information to the stack based on the lower byte,
# so we have to adjust it back.
#
# See https://docs.python.org/3/library/dis.html#opcode-LOAD_ATTR
# for more information.
arg = arg >> 1
o = state.stack.pop()
name = state.get_name(arg)
state.stack.append(_getattr(o, name))
Expand Down Expand Up @@ -417,6 +466,14 @@ def load_fast(state, arg):
state.stack.append(state.vars[arg])


load_fast_check = load_fast


def load_fast_and_clear(state, arg):
state.stack.append(state.vars[arg])
del state.vars[arg]


def store_fast(state, arg):
state.vars[arg] = state.stack.pop()

Expand All @@ -425,6 +482,14 @@ def delete_fast(state, arg):
state.vars[arg] = Any # really an error


def swap(state, arg):
state.stack[-arg], state.stack[-1] = state.stack[-1], state.stack[-arg]


def reraise(state, arg):
pass


# bpo-43683 Adds GEN_START in Python 3.10, but removed in Python 3.11
# https://github.com/python/cpython/pull/25138
def gen_start(state, arg):
Expand Down
30 changes: 29 additions & 1 deletion sdks/python/apache_beam/typehints/trivial_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
dis.dis(f)
from . import opcodes
simple_ops = dict((k.upper(), v) for k, v in opcodes.__dict__.items())
from . import intrinsic_one_ops

co = f.__code__
code = co.co_code
Expand Down Expand Up @@ -432,6 +433,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
elif op in dis.haslocal:
print('(' + co.co_varnames[arg] + ')', end=' ')
elif op in dis.hascompare:
if (sys.version_info.major, sys.version_info.minor) >= (3, 12):
# In 3.12 this arg was bit-shifted. Shifting it back avoids an
# out-of-index.
arg = arg >> 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for my knowledge, where does this come from? Perhaps there is a link you could add in a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is funny in that it was actually an undocumented change as best as I could tell. The comparison operations are similar to the intrinsic functions here in that the arg for a comparison op is an integer index into a list of operations that you index into. The output is always a boolean here anyway so we don't necessarily care what the operation itself is, but for debugging purposes we get the operation to print anyway. The problem was that these indices got bit shifted (a trick this package really likes to do for operations in different versions), so our print statement here was failing with an out-of-range index.

I'll try to add some comments around some of these changes since they're a little arbitrary, but for the most part it really just breaks down to "this argument's representation was changed in the package so we have to change it here for this version"

print('(' + dis.cmp_op[arg] + ')', end=' ')
elif op in dis.hasfree:
if free is None:
Expand Down Expand Up @@ -578,7 +583,13 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
state = None
elif opname in ('POP_JUMP_IF_TRUE', 'POP_JUMP_IF_FALSE'):
state.stack.pop()
jmp = arg * jump_multiplier
# The arg was changed to be a relative delta instead of an absolute
# in 3.11, and became a full instruction instead of a
# pseudo-instruction in 3.12
if (sys.version_info.major, sys.version_info.minor) >= (3, 12):
jmp = pc + arg * jump_multiplier
else:
jmp = arg * jump_multiplier
jmp_state = state.copy()
elif opname in ('POP_JUMP_FORWARD_IF_TRUE', 'POP_JUMP_FORWARD_IF_FALSE'):
state.stack.pop()
Expand Down Expand Up @@ -608,6 +619,10 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
state.stack.pop()
elif opname == 'FOR_ITER':
jmp = pc + arg * jump_multiplier
if sys.version_info >= (3, 12):
# The jump is relative to the next instruction after a cache call,
# so jump 4 more bytes.
jmp += 4
jmp_state = state.copy()
jmp_state.stack.pop()
state.stack.append(element_type(state.stack[-1]))
Expand Down Expand Up @@ -641,6 +656,19 @@ def infer_return_type_func(f, input_types, debug=False, depth=0):
# No-op introduced in 3.11. Without handling this some
# instructions have functionally > 2 byte size.
pass
elif opname == 'RETURN_CONST':
# Introduced in 3.12. Handles returning constants directly
# instead of having a LOAD_CONST before a RETURN_VALUE.
returns.add(state.const_type(arg))
state = None
elif opname == 'CALL_INTRINSIC_1':
# Introduced in 3.12. The arg is an index into a table of
# operations reproduced in INT_ONE_OPS. Not all ops are
# relevant for our type checking infrastructure.
int_op = intrinsic_one_ops.INT_ONE_OPS[arg]
if debug:
print("Executing intrinsic one op", int_op.__name__.upper())
int_op(state, arg)

else:
raise TypeInferenceError('unable to handle %s' % opname)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def testTupleListComprehension(self):
self.assertReturnType(
typehints.List[int],
lambda xs: [x for x in xs], [typehints.Tuple[int, int, int]])

self.assertReturnType(
typehints.List[typehints.Union[int, float]],
lambda xs: [x for x in xs], [typehints.Tuple[int, float]])
Expand Down