apache · damccorm · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json
@@ -1,5 +1,5 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run.",
-  "modification": 4
+  "modification": 29
 }
 
diff --git a/.github/workflows/beam_PostCommit_Python.yml b/.github/workflows/beam_PostCommit_Python.yml
@@ -108,8 +108,8 @@ jobs:
             -PuseWheelDistribution \
             -Pposargs="${{
               contains(matrix.os, 'self-hosted') &&
-                '-m ''not require_docker_in_docker''' ||
-                '-m ''require_docker_in_docker'''
+                '-m (not require_docker_in_docker)' ||
+                '-m require_docker_in_docker'
             }}" \
             -PpythonVersion=${{ matrix.python_version }} \
         env:

diff --git a/.github/workflows/beam_PreCommit_Python_ML.yml b/.github/workflows/beam_PreCommit_Python_ML.yml
@@ -105,8 +105,8 @@ jobs:
           arguments: |
             -Pposargs="${{
               contains(matrix.os, 'self-hosted') &&
-                'apache_beam/ml/ -m ''not require_docker_in_docker''' ||
-                'apache_beam/ml/ -m ''require_docker_in_docker'''
+              'apache_beam/ml/ -m (not require_docker_in_docker)' ||
+                'apache_beam/ml/ -m require_docker_in_docker'
             }}" \
             -PpythonVersion=${{ matrix.python_version }}
       - name: Archive Python Test Results

diff --git a/sdks/python/scripts/run_pytest.sh b/sdks/python/scripts/run_pytest.sh
@@ -30,8 +30,15 @@ envname=${1?First argument required: suite base name}
 posargs=$2
 pytest_args=$3
 
-# strip leading/trailing quotes from posargs because it can get double quoted as its passed through.
-posargs=$(sed -e 's/^"//' -e 's/"$//' -e "s/'$//" -e "s/^'//" <<<$posargs)
+# strip leading/trailing quotes from posargs because it can get double quoted as
+# its passed through.
+if [[ $posargs == '"'*'"' ]]; then
+  # If wrapped in double quotes, remove them
+  posargs="${posargs:1:${#posargs}-2}"
+elif [[ $posargs == "'"*"'" ]]; then
+  # If wrapped in single quotes, remove them.
+  posargs="${posargs:1:${#posargs}-2}"
+fi
 echo "pytest_args: $pytest_args"
 echo "posargs: $posargs"
 
@@ -41,15 +48,26 @@ marker_regex="-m\s+('[^']+'|\"[^\"]+\"|[^ ]+)"
 # Initialize the user_marker variable.
 user_marker=""
 
-# Isolate the user-provided -m argument (matching only).
-if [[ $pytest_args =~ "-m" ]]; then
-  # Extract the marker value using the defined regex.
-  user_marker=$(echo "$pytest_args" | sed -nE "s/.*$marker_regex.*/\1/p")
-fi
+# Define regex pattern for quoted strings
+quotes_regex="^[\"\'](.*)[\"\']$"
 
-# Remove the -m argument from pytest_args (substitution only).
-if [[ -n $user_marker ]]; then
-  pytest_args=$(echo "$pytest_args" | sed -E "s/$marker_regex//")
+# Extract the user markers.
+if [[ $posargs =~ $marker_regex ]]; then
+  # Get the full match including -m and the marker.
+  full_match="${BASH_REMATCH[0]}"
+
+  # Get the marker with quotes (this is the first capture group).
+  quoted_marker="${BASH_REMATCH[1]}"
+
+  # Remove any quotes around the marker.
+  if [[ $quoted_marker =~ $quotes_regex ]]; then
+    user_marker="${BASH_REMATCH[1]}"
+  else
+    user_marker="$quoted_marker"
+  fi
+
+  # Remove the entire -m marker portion from posargs.
+  posargs="${posargs/$full_match/}"
 fi
 
 # Combine user-provided marker with script's internal logic.
@@ -58,20 +76,65 @@ marker_for_sequential_tests="no_xdist"
 
 if [[ -n $user_marker ]]; then
   # Combine user marker with internal markers.
-  marker_for_parallel_tests="($user_marker) and ($marker_for_parallel_tests)"
-  marker_for_sequential_tests="($user_marker) and ($marker_for_sequential_tests)"
+  marker_for_parallel_tests="$user_marker and ($marker_for_parallel_tests)"
+  marker_for_sequential_tests="$user_marker and ($marker_for_sequential_tests)"
+fi
+
+# Parse posargs to separate pytest options from test paths.
+options=""
+test_paths=""
+
+# Safely split the posargs string into individual arguments.
+eval "set -- $posargs"
-eval "set -- $posargs"
+parsed_args=()
+current_arg=""
+in_quote='' # Can be '', "'", or '"'
+
+for (( i=0; i<${#posargs}; i++ )); do
+  char="${posargs:$i:1}"
+
+  if [[ "$in_quote" == '' ]]; then
+    if [[ "$char" == "'" || "$char" == "\"" ]]; then
+      in_quote="$char"
+    elif [[ "$char" == " " ]]; then
+      # Add current_arg to parsed_args if not empty
+      if [[ -n "$current_arg" ]]; then
+        parsed_args+=("$current_arg")
+        current_arg=""
+      fi
+    else
+      current_arg+="$char"
+    fi
+  else # Inside a quote
+    if [[ "$char" == "$in_quote" ]]; then
+      in_quote='' # End of quote
+    else
+      current_arg+="$char"
+    fi
+  fi
+done
+
+# Add the last argument if it's not empty
+if [[ -n "$current_arg" ]]; then
+  parsed_args+=("$current_arg")
+fi
+
+set -- "${parsed_args[@]}"
-eval "set -- $posargs"
+parsed_args=()
+current_arg=""
+in_quote='' # Can be '', "'", or '"'
+
+for (( i=0; i<${#posargs}; i++ )); do
+  char="${posargs:$i:1}"
+
+  if [[ "$in_quote" == '' ]]; then
+    if [[ "$char" == "'" || "$char" == "\"" ]]; then
+      in_quote="$char"
+    elif [[ "$char" == " " ]]; then
+      # Add current_arg to parsed_args if not empty
+      if [[ -n "$current_arg" ]]; then
+        parsed_args+=("$current_arg")
+        current_arg=""
+      fi
+    else
+      current_arg+="$char"
+    fi
+  else # Inside a quote
+    if [[ "$char" == "$in_quote" ]]; then
+      in_quote='' # End of quote
+    else
+      current_arg+="$char"
+    fi
+  fi
+done
+
+# Add the last argument if it's not empty
+if [[ -n "$current_arg" ]]; then
+  parsed_args+=("$current_arg")
+fi
+
+set -- "${parsed_args[@]}"
+
+# Iterate through arguments.
+while [[ $# -gt 0 ]]; do
+  arg="$1"
+  shift
+
+  # If argument starts with dash, it's an option.
+  if [[ "$arg" == -* ]]; then
+    options+=" $arg"
+
+    # Check if there's a next argument and it doesn't start with a dash.
+    # This assumes it's a value for the current option.
+    if [[ $# -gt 0 && "$1" != -* ]]; then
+      # Get the next argument.
+      next_arg="$1"
+
+      # Check if it's quoted and remove quotes if needed.
+      if [[ $next_arg =~ $quotes_regex ]]; then
+        # Extract the content inside quotes.
+        next_arg="${BASH_REMATCH[1]}"
+      fi
+
+      # Add the unquoted value to options.
+      options+=" $next_arg"
+      shift
+    fi
+  else
+    # Otherwise it's a test path.
+    test_paths+=" $arg"
+  fi
+done
+
+# Construct the final pytest command arguments.
+pyargs_section=""
+if [[ -n "$test_paths" ]]; then
+  pyargs_section="--pyargs $test_paths"
 fi
+pytest_command_args="$options $pyargs_section"
 
 # Run tests in parallel.
-echo "Running parallel tests with: pytest -m \"$marker_for_parallel_tests\" $pytest_args"
+echo "Running parallel tests with: pytest -m \"$marker_for_parallel_tests\" $pytest_command_args"
 pytest -v -rs -o junit_suite_name=${envname} \
-  --junitxml=pytest_${envname}.xml -m "$marker_for_parallel_tests" -n 6 --import-mode=importlib ${pytest_args} --pyargs ${posargs}
+  --junitxml=pytest_${envname}.xml -m "$marker_for_parallel_tests" -n 6 --import-mode=importlib ${pytest_args} ${pytest_command_args}
 status1=$?
 
 # Run tests sequentially.
-echo "Running sequential tests with: pytest -m \"$marker_for_sequential_tests\" $pytest_args"
+echo "Running sequential tests with: pytest -m \"$marker_for_sequential_tests\" $pytest_command_args"
 pytest -v -rs -o junit_suite_name=${envname}_no_xdist \
-  --junitxml=pytest_${envname}_no_xdist.xml -m "$marker_for_sequential_tests" --import-mode=importlib ${pytest_args} --pyargs ${posargs}
+  --junitxml=pytest_${envname}_no_xdist.xml -m "$marker_for_sequential_tests" --import-mode=importlib ${pytest_args} ${pytest_command_args}
 status2=$?
 
 # Exit with error if no tests were run in either suite (status code 5).