From c75d7937b7a13fee5b82108c5391406d7f4f4131 Mon Sep 17 00:00:00 2001 From: Ivan Dashchinskiy Date: Thu, 25 Feb 2021 19:07:45 +0300 Subject: [PATCH] IGNITE-14245 Fix infinite loop while trying to get affinity mapping on failed node. --- .travis.yml | 51 +++++++++++++++++ pyignite/cache.py | 1 + tests/affinity/conftest.py | 8 ++- tests/affinity/test_affinity_bad_servers.py | 63 +++++++++++++-------- 4 files changed, 98 insertions(+), 25 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..7e726be --- /dev/null +++ b/.travis.yml @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +language: python +sudo: required + +addons: + apt: + packages: + - openjdk-8-jdk + +env: + global: + - IGNITE_VERSION=2.9.1 + - IGNITE_HOME=/opt/ignite + +before_install: + - curl -L https://apache-mirror.rbc.ru/pub/apache/ignite/${IGNITE_VERSION}/apache-ignite-slim-${IGNITE_VERSION}-bin.zip > ignite.zip + - unzip ignite.zip -d /opt + - mv /opt/apache-ignite-slim-${IGNITE_VERSION}-bin /opt/ignite + - mv /opt/ignite/libs/optional/ignite-log4j2 /opt/ignite/libs/ + +jobs: + include: + - python: '3.6' + arch: amd64 + env: TOXENV=py36 + - python: '3.7' + arch: amd64 + env: TOXENV=py37 + - python: '3.8' + arch: amd64 + env: TOXENV=py38 + - python: '3.9' + arch: amd64 + env: TOXENV=py39 + +install: pip install tox +script: tox \ No newline at end of file diff --git a/pyignite/cache.py b/pyignite/cache.py index ea672a8..a91a3cf 100644 --- a/pyignite/cache.py +++ b/pyignite/cache.py @@ -264,6 +264,7 @@ def get_best_node( break except connection_errors: # retry if connection failed + conn = self._client.random_node pass except CacheError: # server did not create mapping in time diff --git a/tests/affinity/conftest.py b/tests/affinity/conftest.py index b682d01..7595f25 100644 --- a/tests/affinity/conftest.py +++ b/tests/affinity/conftest.py @@ -19,6 +19,10 @@ from pyignite.api import cache_create, cache_destroy from tests.util import start_ignite_gen +# Sometimes on slow testing servers and unstable topology +# default timeout is not enough for cache ops. +CLIENT_SOCKET_TIMEOUT = 20.0 + @pytest.fixture(scope='module', autouse=True) def server1(): @@ -37,7 +41,7 @@ def server3(): @pytest.fixture def client(): - client = Client(partition_aware=True) + client = Client(partition_aware=True, timeout=CLIENT_SOCKET_TIMEOUT) client.connect([('127.0.0.1', 10800 + i) for i in range(1, 4)]) @@ -48,7 +52,7 @@ def client(): @pytest.fixture def client_not_connected(): - client = Client(partition_aware=True) + client = Client(partition_aware=True, timeout=CLIENT_SOCKET_TIMEOUT) yield client client.close() diff --git a/tests/affinity/test_affinity_bad_servers.py b/tests/affinity/test_affinity_bad_servers.py index 8abf4a0..6fd08d5 100644 --- a/tests/affinity/test_affinity_bad_servers.py +++ b/tests/affinity/test_affinity_bad_servers.py @@ -16,45 +16,62 @@ import pytest from pyignite.exceptions import ReconnectError -from tests.util import start_ignite, kill_process_tree +from tests.affinity.conftest import CLIENT_SOCKET_TIMEOUT +from tests.util import start_ignite, kill_process_tree, get_client -def test_client_with_multiple_bad_servers(client_not_connected): +@pytest.fixture(params=['with-partition-awareness', 'without-partition-awareness']) +def with_partition_awareness(request): + yield request.param == 'with-partition-awareness' + + +def test_client_with_multiple_bad_servers(with_partition_awareness): with pytest.raises(ReconnectError) as e_info: - client_not_connected.connect([("127.0.0.1", 10900), ("127.0.0.1", 10901)]) + with get_client(partition_aware=with_partition_awareness) as client: + client.connect([("127.0.0.1", 10900), ("127.0.0.1", 10901)]) assert str(e_info.value) == "Can not connect." -def test_client_with_failed_server(request, client_not_connected): +def test_client_with_failed_server(request, with_partition_awareness): srv = start_ignite(idx=4) try: - client_not_connected.connect([("127.0.0.1", 10804)]) - cache = client_not_connected.get_or_create_cache(request.node.name) - cache.put(1, 1) - kill_process_tree(srv.pid) - with pytest.raises(ConnectionResetError): - cache.get(1) + with get_client(partition_aware=with_partition_awareness) as client: + client.connect([("127.0.0.1", 10804)]) + cache = client.get_or_create_cache(request.node.name) + cache.put(1, 1) + kill_process_tree(srv.pid) + + if with_partition_awareness: + ex_class = (ReconnectError, ConnectionResetError) + else: + ex_class = ConnectionResetError + + with pytest.raises(ex_class): + cache.get(1) finally: kill_process_tree(srv.pid) -def test_client_with_recovered_server(request, client_not_connected): +def test_client_with_recovered_server(request, with_partition_awareness): srv = start_ignite(idx=4) try: - client_not_connected.connect([("127.0.0.1", 10804)]) - cache = client_not_connected.get_or_create_cache(request.node.name) - cache.put(1, 1) + with get_client(partition_aware=with_partition_awareness, timeout=CLIENT_SOCKET_TIMEOUT) as client: + client.connect([("127.0.0.1", 10804)]) + cache = client.get_or_create_cache(request.node.name) + cache.put(1, 1) - # Kill and restart server - kill_process_tree(srv.pid) - srv = start_ignite(idx=4) + # Kill and restart server + kill_process_tree(srv.pid) + srv = start_ignite(idx=4) - # First request fails - with pytest.raises(Exception): - cache.put(1, 2) + # First request may fail. + try: + cache.put(1, 2) + except: + pass - # Retry succeeds - cache.put(1, 2) - assert cache.get(1) == 2 + # Retry succeeds + cache.put(1, 2) + assert cache.get(1) == 2 finally: kill_process_tree(srv.pid)