2
votes

I have read When is TCP option SO_LINGER (0) required? and several other related questions and answers but I am unable to reproduce any of the SO_LINGER behavior explained in these posts. I will share one of my many experiments here.

I am performing this experiment in the following environment.

$ lsb_release -d
Description:    Debian GNU/Linux 9.0 (stretch)
$ gcc -dumpversion
6.3.0

Here is an example of a misbehaving client that connects to a server, but does not receive any data for 90 seconds.

/* client.c */
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#include <sys/socket.h>
#include <arpa/inet.h>
#include <netdb.h>

int main()
{
    int sockfd;
    int ret;
    struct addrinfo hints, *ai;
    char buffer[256];
    ssize_t bytes;

    memset(&hints, 0, sizeof hints);
    hints.ai_family = AF_INET;
    hints.ai_socktype = SOCK_STREAM;

    if ((ret = getaddrinfo(NULL, "8000", &hints, &ai)) == -1) {
        fprintf(stderr, "client: getaddrinfo: %s\n", gai_strerror(ret));
        return 1;
    }

    sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
    if (sockfd == -1) {
        perror("client: socket");
        return 1;
    }

    if (connect(sockfd, ai->ai_addr, ai->ai_addrlen) == -1) {
        perror("client: connect");
        close(sockfd);
        return -1;
    }

    printf("client: connected\n");

    /*
    bytes = recv(sockfd, buffer, sizeof buffer, 0);
    if (recv(sockfd, buffer, sizeof buffer, 0) == -1) {
        perror("client: recv");
        close(sockfd);
        return -1;
    }

    printf("client: received: %.*s\n", (int) bytes, buffer);
    */

    sleep(90);
    freeaddrinfo(ai);

    printf("client: closing socket ...\n");
    close(sockfd);
    printf("client: closed socket!\n");

    return 0;
}

Here is my server code that sends hello to each client that connects to the server and then closes the connection immediately. This server is not multithreaded for simplicity. In a multithreaded server where it would be accepting connections from 100s of connections from clients, many of which could be potentially misbehaving, our goal is to discard useless sockets soon in order to free the ports engaged for those sockets.

To achieve this, we are enabling the SO_LINGER socket option with a linger time-out of 10 seconds.

/* server.c */
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#include <sys/socket.h>
#include <arpa/inet.h>
#include <netdb.h>

int main()
{
    int sockfd;
    int ret;
    int yes = 1;

    struct addrinfo hints, *ai;

    memset(&hints, 0, sizeof hints);
    hints.ai_family = AF_INET;
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_flags = AI_PASSIVE;

    if ((ret = getaddrinfo(NULL, "8000", &hints, &ai)) == -1) {
        fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret));
        return 1;
    }

    sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
    if (sockfd == -1) {
        perror("server: socket");
        return 1;
    }

    if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) == -1) {
        perror("server: setsockopt");
        close(sockfd);
        return 1;
    }

    if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) == -1) {
        perror("server: bind");
        close(sockfd);
        return 1;
    }

    freeaddrinfo(ai);

    if (listen(sockfd, 10) == -1) {
        perror("server: listen");
        close(sockfd);
        return 1;
    }

    printf("server: listening ...\n");

    while (1) {
        int client_sockfd;
        struct sockaddr_storage client_addr;
        socklen_t client_addrlen = sizeof client_addr;
        struct linger l_opt;

        printf("server: accepting ...\n");
        client_sockfd = accept(sockfd, (struct sockaddr *) &client_addr,
                               &client_addrlen);

        /* Set SO_LINGER opt for the new client socket. */
        l_opt.l_onoff = 1;
        l_opt.l_linger = 10;
        setsockopt(sockfd, SOL_SOCKET, SO_LINGER, &l_opt, sizeof l_opt);

        if (client_sockfd == -1) {
            perror("server: accept");
            continue;
        }

        if (send(client_sockfd, "hello\n", 6, 0) == -1) {
            perror("server: send");
            continue;
        }

        printf("server: sent: hello\n");
        printf("server: closing client socket ...\n");
        close(client_sockfd);
        printf("server: closed client socket!\n");
    }

    return 0;
}

Here is my experiment runner.

# run.sh
gcc -std=c99 -Wall -Wextra -Wpedantic -D_DEFAULT_SOURCE server.c -o server
gcc -std=c99 -Wall -Wextra -Wpedantic -D_DEFAULT_SOURCE client.c -o client
./server &
sleep 1
./client
pkill ^server$

In another window/terminal, I run this little bash script to monitor the state of the sockets every 10 seconds.

$ for i in {1..10}; do netstat -nopa 2> /dev/null | grep :8000; echo =====; sleep 10; done
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
tcp        0      0 127.0.0.1:8000          127.0.0.1:35536         FIN_WAIT2   -                    timewait (59.84/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
tcp        0      0 127.0.0.1:8000          127.0.0.1:35536         FIN_WAIT2   -                    timewait (49.83/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
tcp        0      0 127.0.0.1:8000          127.0.0.1:35536         FIN_WAIT2   -                    timewait (39.82/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
tcp        0      0 127.0.0.1:8000          127.0.0.1:35536         FIN_WAIT2   -                    timewait (29.81/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
tcp        0      0 127.0.0.1:8000          127.0.0.1:35536         FIN_WAIT2   -                    timewait (19.80/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
tcp        0      0 127.0.0.1:8000          127.0.0.1:35536         FIN_WAIT2   -                    timewait (9.78/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
tcp        0      0 127.0.0.1:8000          127.0.0.1:35536         FIN_WAIT2   -                    timewait (0.00/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
=====
tcp        0      0 0.0.0.0:8000            0.0.0.0:*               LISTEN      16293/./server       off (0.00/0/0)
tcp        7      0 127.0.0.1:35536         127.0.0.1:8000          CLOSE_WAIT  16295/./client       off (0.00/0/0)
=====
=====

The above output shows that the server socket (third row in each iteration of the output), remains in FIN_WAIT2 state for 60 seconds (i.e. the default timewait).

Why did the SO_LINGER option with a timeout of 10 seconds did not ensure that the server closed its client socket (i.e. Local Address = 127.0.0.1:8000; Foreign Address = 127.0.0.1:35536) successfully after 10 seconds?

Note: I get the same results even with a 0 timeout, i.e. with the following code, the socket for Local Address = 127.0.0.1:8000 and Foreign Address = 127.0.0.1:35536 remains in FIN_WAIT2 state for 60 seconds.

        /* Set SO_LINGER opt for the new client socket. */
        l_opt.l_onoff = 1;
        l_opt.l_linger = 0;
        setsockopt(sockfd, SOL_SOCKET, SO_LINGER, &l_opt, sizeof l_opt);

If SO_LINGER has no effect on the removal of socket or the FIN_WAIT2 timeout, then what really is the purpose of SO_LINGER?

2
See POSIX Use of Options in the section describing POSIX Sockets.Jonathan Leffler
@JonathanLeffler The POSIX document says nothing about TIME_WAIT but almost every discussion about SO_LINGER here on StackOverflow including the question and answers I have linked seem to talk about TIME_WAIT. If SO_LINGER has no effect on TIME_WAIT, are all these existing answers misguided?Lone Learner
Yes; the POSIX document does not contain the word TIME_WAIT anywhere. If you want to find out about that, you'll have to search elsewhere for the information. Maybe you could get hold of W. Richard Stevens, Bill Fenner, Andrew M. Rudoff UNIX® Network Programming, Vol 1: The Sockets Networking API, 3rd Edn — aka UNIX® Network Programming, Vol 1: The Sockets Networking API.Jonathan Leffler
@LoneLearner It eliminates TIME_WAIT if you reset the connection, but your original code didn't do that. You can read about TIME_WAIT in RFC 793.user207421
@LoneLearner, please, read developerweb.net/viewtopic.php?id=2982 for an explanation on how to do, what it means (most important) and when it is appropiate to use SO_LINGER option. YOU CAN LOSS THE FINAL CONNECTION DATA in case you misuse it.Luis Colorado

2 Answers

5
votes

You have a basic misunderstanding.

Setting SO_LINGER with a positive timeout does exactly one thing. It enables close() to block for up to that timeout while there is any outbound pending data still in flight. If you don't modify it, the default is for the close() to be asynchronous, which means the application can't tell whether any data still in flight got sent.

So the purpose of doing it is to enable the application to detect a failure to completely send that final pending data.

It doesn't have anything whatsoever to do with cleaning up dead or useless sockets. Specifically, it doesn't shorten the TIME_WAIT or following TCP timeouts after the close.

That can be accomplished in another way by using a different setting, but the effect of that is to reset the connection and lose any data in flight, and possibly cause consternation at the other end, so it is not recommended. At least by me.

Your actual code is behaving exactly as expected. The server has closed, so the client is in CLOSE_WAIT for 90 seconds, and the server is in FIN_WAIT_2 waiting for the client to close. There is nothing here but a misbehaving client. The server will survive it as soon as the timeouts expire.

-2
votes

@LoneLearner Instead of using:

l_onoff=1

l_linger=0

try this:

l_onoff=0

l_linger=0

You will see a very different behavior of your app. In the second case, as soon as you close() the socker you also will get rid of it immediately.

This is an extreme action that closes the connection abruptly, and the remote end will see an error (connection reset), Moreover, unsent data will be discarded. The convenience of this setting of so_linger depends on the specific app and situation. Many do not consider this as a good practice.