I have a series of AWS Lambdas that are fed from SQS queue event triggers. However, sometimes when I try to delete the message from the queue, the attempt times out over and over again until my Lambda timeout occurs.
I enabled Debug logging which confirmed it was a socket timeout, but I don't get any further details beyond that. This also appears to be irregular. At first, I thought it was a Lambda warmup issue, but I've seen the problem after running the lambda multiple times successfully and on the first deploy.
What I've tried so far:
- I thought maybe using a Boto client vs a Boto resource was the problem, but I saw the same result with both methods.
- I've tweaked the connection and read timeouts to be higher than the default, however, the connection just retries with the Boto retry logic under the hood.
- I've tried the connection timeout to be lower, but this just means more retries before the lambda timeout.
- I've tried both standard and FIFO queue types, both have the same problem
A couple of other details:
- Python v3.8.5
- Boto3 v1.16.1
- My SQS settings are set for a 5-second delay and a 120-second visibility timeout
- My lambda timeout is 120 seconds.
Snippet of the code that I'm using:
config = Config(connect_timeout=30, read_timeout=30, retries={'total_max_attempts': 1}, region_name='us-east-1')
sqs_client = boto3.client(service_name='sqs', config=config)
receiptHandle = event['Records'][0]['receiptHandle']\
fromQueueName = eventSourceARN.split(':')[-1]
fromQueue = sqs_client.get_queue_url(QueueName=fromQueueName)
fromQueueUrl = sqs_client.get_queue_url(QueueName=fromQueueName)['QueueUrl']
messageDelete = sqs_client.delete_message(QueueUrl=fromQueueUrl, ReceiptHandle=receiptHandle)
And the and example of the DEBUG exception I'm seeing:
[DEBUG] 2020-10-29T21:27:28.32Z 3c60cac9-6d99-58c6-84c9-92dc581919fd retry needed, retryable exception caught: Connect timeout on endpoint URL: "https://queue.amazonaws.com/" Traceback (most recent call last): File "/var/task/urllib3/connection.py", line 159, in _new_conn conn = connection.create_connection( File "/var/task/urllib3/util/connection.py", line 84, in create_connection raise err File "/var/task/urllib3/util/connection.py", line 74, in create_connection sock.connect(sa) socket.timeout: timed out During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/opt/python/botocore/httpsession.py", line 254, in send urllib_response = conn.urlopen( File "/var/task/urllib3/connectionpool.py", line 726, in urlopen retries = retries.increment( File "/var/task/urllib3/util/retry.py", line 386, in increment raise six.reraise(type(error), error, _stacktrace) File "/var/task/urllib3/packages/six.py", line 735, in reraise raise value File "/var/task/urllib3/connectionpool.py", line 670, in urlopen httplib_response = self._make_request( File "/var/task/urllib3/connectionpool.py", line 381, in _make_request self._validate_conn(conn) File "/var/task/urllib3/connectionpool.py", line 978, in _validate_conn conn.connect() File "/var/task/urllib3/connection.py", line 309, in connect conn = self._new_conn() File "/var/task/urllib3/connection.py", line 164, in _new_conn raise ConnectTimeoutError( urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPSConnection object at 0x7f27b56b7460>, 'Connection to queue.amazonaws.com timed out. (connect timeout=15)') During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/opt/python/utils.py", line 79, in preflight_check fromQueue = sqs_client.get_queue_url(QueueName=fromQueueName) File "/opt/python/botocore/client.py", line 357, in _api_call return self._make_api_call(operation_name, kwargs) File "/opt/python/botocore/client.py", line 662, in _make_api_call http, parsed_response = self._make_request( File "/opt/python/botocore/client.py", line 682, in _make_request return self._endpoint.make_request(operation_model, request_dict) File "/opt/python/botocore/endpoint.py", line 102, in make_request return self._send_request(request_dict, operation_model) File "/opt/python/botocore/endpoint.py", line 136, in _send_request while self._needs_retry(attempts, operation_model, request_dict, File "/opt/python/botocore/endpoint.py", line 253, in _needs_retry responses = self._event_emitter.emit( File "/opt/python/botocore/hooks.py", line 356, in emit return self._emitter.emit(aliased_event_name, **kwargs) File "/opt/python/botocore/hooks.py", line 228, in emit return self._emit(event_name, kwargs) File "/opt/python/botocore/hooks.py", line 211, in _emit response = handler(**kwargs) File "/opt/python/botocore/retryhandler.py", line 183, in __call__ if self._checker(attempts, response, caught_exception): File "/opt/python/botocore/retryhandler.py", line 250, in __call__ should_retry = self._should_retry(attempt_number, response, File "/opt/python/botocore/retryhandler.py", line 277, in _should_retry return self._checker(attempt_number, response, caught_exception) File "/opt/python/botocore/retryhandler.py", line 316, in __call__ checker_response = checker(attempt_number, response, File "/opt/python/botocore/retryhandler.py", line 222, in __call__ return self._check_caught_exception( File "/opt/python/botocore/retryhandler.py", line 359, in _check_caught_exception raise caught_exception File "/opt/python/botocore/endpoint.py", line 200, in _do_get_response http_response = self._send(request) File "/opt/python/botocore/endpoint.py", line 269, in _send return self.http_session.send(request) File "/opt/python/botocore/httpsession.py", line 287, in send raise ConnectTimeoutError(endpoint_url=request.url, error=e) botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "https://queue.amazonaws.com/" During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/opt/python/botocore/retryhandler.py", line 269, in _should_retry return self._checker(attempt_number, response, caught_exception) File "/opt/python/botocore/retryhandler.py", line 316, in __call__ checker_response = checker(attempt_number, response, File "/opt/python/botocore/retryhandler.py", line 222, in __call__ return self._check_caught_exception( File "/opt/python/botocore/retryhandler.py", line 359, in _check_caught_exception raise caught_exception File "/opt/python/botocore/endpoint.py", line 200, in _do_get_response http_response = self._send(request) File "/opt/python/botocore/endpoint.py", line 269, in _send return self.http_session.send(request) File "/opt/python/botocore/httpsession.py", line 287, in send raise ConnectTimeoutError(endpoint_url=request.url, error=e) botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "https://queue.amazonaws.com/"