From a8c8f7e531f10139431370885aabf89e6fb4531e Mon Sep 17 00:00:00 2001 From: Gajesh Bhat Date: Mon, 25 Aug 2025 17:21:46 -0700 Subject: [PATCH 1/2] fix(agent): handle None response from retry exhaustion to prevent crashes Fix AttributeError when session.post() returns None after retry exhaustion. When the server returns repeated 503 errors, the requests retry mechanism can return None instead of a Response object, causing agents to crash when accessing status_code attribute. Fixed methods: - post_status_update() - post_result() - get_result() - repost_job() - post_artifacts() Resolves agents showing EXITED status in supervisorctl. Fixes #438 (Fixes #CERTTF-474 Internally) Fix AttributeError when session.post() returns None after retry exhaustion. When the server returns repeated 503 errors, the requests retry mechanism can return None instead of a Response object, causing agents to crash when accessing status_code attribute. Fixed methods: - post_status_update() - post_result() - get_result() - save_artifacts() Resolves agents showing EXITED status in supervisorctl. Fixes #438 (Fixes #CERTTF-474 Internally) --- agent/src/testflinger_agent/client.py | 66 +++++++++++++++++++-------- agent/tests/test_client.py | 63 +++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 19 deletions(-) diff --git a/agent/src/testflinger_agent/client.py b/agent/src/testflinger_agent/client.py index b9b5ac375..a35950387 100644 --- a/agent/src/testflinger_agent/client.py +++ b/agent/src/testflinger_agent/client.py @@ -214,12 +214,20 @@ def post_result(self, job_id, data): logger.error(exc) raise TFServerError("other exception") from exc if not job_request: - logger.error( - "Unable to post results to: %s (error: %d)", - result_uri, - job_request.status_code, - ) - raise TFServerError(job_request.status_code) + if job_request is None: + logger.error( + "Unable to post results to: %s (error: %s)", + result_uri, + "No response received", + ) + raise TFServerError("No response received") + else: + logger.error( + "Unable to post results to: %s (error: %d)", + result_uri, + job_request.status_code, + ) + raise TFServerError(job_request.status_code) def get_result(self, job_id): """Get current results data to the testflinger server for this job. @@ -238,11 +246,18 @@ def get_result(self, job_id): logger.error(exc) return {} if not job_request: - logger.error( - "Unable to get results from: %s (error: %d)", - result_uri, - job_request.status_code, - ) + if job_request is None: + logger.error( + "Unable to get results from: %s (error: %s)", + result_uri, + "No response received", + ) + else: + logger.error( + "Unable to get results from: %s (error: %d)", + result_uri, + job_request.status_code, + ) return {} if job_request.content: return job_request.json() @@ -332,12 +347,20 @@ def save_artifacts(self, rundir, job_id): artifact_uri, files=file_upload, timeout=600 ) if not artifact_request: - logger.error( - "Unable to post results to: %s (error: %d)", - artifact_uri, - artifact_request.status_code, - ) - raise TFServerError(artifact_request.status_code) + if artifact_request is None: + logger.error( + "Unable to post results to: %s (error: %s)", + artifact_uri, + "No response received", + ) + raise TFServerError("No response received") + else: + logger.error( + "Unable to post results to: %s (error: %d)", + artifact_uri, + artifact_request.status_code, + ) + raise TFServerError(artifact_request.status_code) else: shutil.rmtree(artifacts_dir) @@ -523,10 +546,15 @@ def post_status_update( ) # Response code is greater than 399 if not job_request: + error_msg = ( + "No response received" + if job_request is None + else f"HTTP {job_request.status_code}" + ) logger.error( - "Unable to post status updates to: %s (error: %d)", + "Unable to post status updates to: %s (error: %s)", status_update_uri, - job_request.status_code, + error_msg, ) except requests.exceptions.RequestException as exc: logger.error( diff --git a/agent/tests/test_client.py b/agent/tests/test_client.py index 1b6a87d2e..2a7833efc 100644 --- a/agent/tests/test_client.py +++ b/agent/tests/test_client.py @@ -716,3 +716,66 @@ def test_update_session_auth_skips_cookie_refresh( and "/v1/agents/data/test_agent" in request.url ] assert len(agent_data_calls) == 0 + + @patch("requests.Session.post", return_value=None) + def test_post_status_update_none_response(self, mock_post, client, caplog): + """ + Test that post_status_update handles None response gracefully. + This simulates the case where retry exhaustion returns None. + """ + job_id = str(uuid.uuid1()) + + client.post_status_update("myjobqueue", "http://foo", [], job_id) + assert "No response received" in caplog.text + + @patch("requests.Session.post", return_value=None) + def test_post_result_none_response(self, mock_post, client, caplog): + """Test that post_result handles None response gracefully.""" + job_id = str(uuid.uuid1()) + + with pytest.raises(TFServerError, match="No response received"): + client.post_result(job_id, {"test": "data"}) + assert "No response received" in caplog.text + + @patch("requests.Session.get", return_value=None) + def test_get_result_none_response(self, mock_get, client, caplog): + """Test that get_result handles None response gracefully.""" + job_id = str(uuid.uuid1()) + + result = client.get_result(job_id) + assert result == {} + assert "No response received" in caplog.text + + def test_save_artifacts_error_response( + self, client, requests_mock, tmp_path, caplog + ): + """Test that save_artifacts handles HTTP error response correctly.""" + job_id = str(uuid.uuid1()) + + # Create artifacts directory with a file + artifacts_dir = tmp_path / "artifacts" + artifacts_dir.mkdir() + (artifacts_dir / "test.txt").write_text("test content") + + artifact_uri = f"http://127.0.0.1:8000/v1/result/{job_id}/artifact" + requests_mock.post(artifact_uri, status_code=500) + with pytest.raises(TFServerError): + client.save_artifacts(tmp_path, job_id) + assert "Unable to post results" in caplog.text + assert "error: 500" in caplog.text + + @patch("requests.Session.post", return_value=None) + def test_save_artifacts_none_response( + self, mock_post, client, tmp_path, caplog + ): + """Test that save_artifacts handles None response gracefully.""" + job_id = str(uuid.uuid1()) + + # Create artifacts directory with a file + artifacts_dir = tmp_path / "artifacts" + artifacts_dir.mkdir() + (artifacts_dir / "test.txt").write_text("test content") + + with pytest.raises(TFServerError, match="No response received"): + client.save_artifacts(tmp_path, job_id) + assert "No response received" in caplog.text From 9f6be42f56442dd96bcf0bd27f25eb301dab9244 Mon Sep 17 00:00:00 2001 From: Gajesh Bhat Date: Mon, 9 Feb 2026 14:09:23 -0800 Subject: [PATCH 2/2] test(agent): add status_code branch tests and use @patch decorators Signed-off-by: Gajesh Bhat --- agent/tests/test_client.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/agent/tests/test_client.py b/agent/tests/test_client.py index 2a7833efc..67b597d5f 100644 --- a/agent/tests/test_client.py +++ b/agent/tests/test_client.py @@ -154,8 +154,9 @@ def test_transmit_job_outcome(self, client, requests_mock, tmp_path): client.transmit_job_outcome(tmp_path) assert requests_mock.last_request.json() == {"job_state": "complete"} + @patch.object(_TestflingerClient, "save_artifacts", side_effect=OSError) def test_transmit_job_outcome_with_error( - self, client, requests_mock, tmp_path, caplog + self, mock_save_artifacts, client, requests_mock, tmp_path, caplog ): """ Test that OSError during save_artifacts results in removing the job @@ -173,9 +174,7 @@ def test_transmit_job_outcome_with_error( status_code=HTTPStatus.OK, ) - # Simulate an error during save_artifacts - with patch.object(client, "save_artifacts", side_effect=OSError): - client.transmit_job_outcome(tmp_path) + client.transmit_job_outcome(tmp_path) assert tmp_path.exists() is False assert "Unable to save artifacts" in caplog.text @@ -191,7 +190,8 @@ def test_result_post_endpoint_error(self, client, requests_mock, caplog): ) with pytest.raises(TFServerError): client.post_result(job_id, {}) - assert "Unable to post results" in caplog.text + assert "Unable to post results" in caplog.text + assert "error: 404" in caplog.text def test_result_get_endpoint_error(self, client, requests_mock, caplog): """ @@ -206,6 +206,7 @@ def test_result_get_endpoint_error(self, client, requests_mock, caplog): response = client.get_result(job_id) assert response == {} assert "Unable to get results" in caplog.text + assert "error: 404" in caplog.text def test_attachment_endpoint_error(self, client, requests_mock, caplog): """ @@ -391,19 +392,20 @@ def test_check_jobs_without_agent_registration_reregisters( assert len(post_requests) == 1 assert post_requests[0].json() == {"job_id": ""} - def test_check_jobs_request_exception(self, client): + @patch("testflinger_agent.client.time.sleep") + @patch("testflinger_agent.client.logger") + @patch("requests.Session.get") + def test_check_jobs_request_exception( + self, mock_get, mock_logger, mock_sleep, client + ): """ Test that check_jobs handles RequestException (network failure) by logging error and sleeping. """ network_error = RequestException("Connection refused") + mock_get.side_effect = network_error - with patch.object(client.session, "get", side_effect=network_error): - with patch("testflinger_agent.client.logger") as mock_logger: - with patch( - "testflinger_agent.client.time.sleep" - ) as mock_sleep: - result = client.check_jobs() + result = client.check_jobs() # Verify logger.error was called with the exception mock_logger.error.assert_called_with(network_error)