0
votes

I have made the following win32 socket program to browse web pages. I am using wingw to avoid dependency on any runtime. To get ipaddresses I ping urls such as www.google.com, www.yahoo.com through command prompt and use those ip addreses in my program. Port is ofcourse 80.

I am able to get default pages of google, yahoo etc by using "GET /\r\n". I am also able to get non-default pages, even those inside directories, such as http://yasini.com/newsite/index.aspx by using "GET /newsite/index.aspx". The output of the program is in the form of html received from webserver, saved on hard disk. This file is later opened in firefox to see how did the communication go.

I have made a test webpage, http://a.domaindlx.com/trysite/hello.asp, which I can open in firefox. Then I ping the domain, a.domaindlx.com and get this ipaddress, 66.36.238.30. I try to access the said page by using "GET /trysite/hello.asp" but get this in response, "No web site is configured at this address. No web site is configured at this address."

I know that the said response is sent by the webserver, so I was able to connect to the webserver. The problem is that the webserver is not recognizing the url I am trying to access. I have used different webpages, both htm and asp and none is accessible.

When trying to open website using ipaddress directly in browser, I get the same error, "No website is configured...".

The basic puzzle is, why are these pages accessible through a browser such as firefox, but not through my code, when my code is essentially a browser, mean open connection with webserver at port 80.


    #include windows.h
    #include stdio.h

    WSADATA ws;

    int d;
    char aa[1000];
    struct sockaddr_in a;
    SOCKET s;
    int li;

    void abc(char *p)
    {
        FILE *fp = fopen("c:\\data.htm", "a+");
        fprintf(fp, "%s\n", p);
        fclose(fp);
    }

    _stdcall WinMain (HINSTANCE i, HINSTANCE j, char * k, int l)
    {
        d = WSAStartup(0x101, &ws);
        sprintf(aa, "WSASTARTUP = %d", d);
        abc(aa);

        s = socket(AF_INET, SOCK_STREAM, 0);
        sprintf(aa, "SOCKET = %d", s);
        abc(aa);

        a.sin_family = AF_INET;
        a.sin_port = htons(80);
        //a.sin_addr.s_addr = inet_addr("74.125.236.145");
        a.sin_addr.s_addr = inet_addr("66.36.238.30"); //a.domaindlx.com
        //a.sin_addr.s_addr = inet_addr("206.225.85.18"); //www.domaindlx.com
        //a.sin_addr.s_addr = inet_addr("87.248.122.122"); //www.yahoo.com
        //a.sin_addr.s_addr = inet_addr("72.167.153.9"); //www.yasini.com
        d = connect(s, (struct sockaddr *) &a, sizeof(a));

        strcpy(aa, "GET /trysite/hello.asp\r\n");
        strcat(aa, "HTTP 1.0 \r\n\r\n");
        send(s, aa, sizeof(aa), 0);
        li = 1;

        while(li != 0)
        {
            li = recv(s, aa, 1000, 0);
            abc(aa);
        }
    }

Note: Please enclose the header file names in the include line in angle brackets for the code to work. I had to remove that to property format the html.

3
Why not save yourself a lot of bother and use the excellent libcurl library? Portable, solid and free.rushman

3 Answers

1
votes

The troublesome URL is running on a subdomain. The successful URLs are not. Many webservers host multiple accounts on the same physical IP(s), so they need to know which particular domain/subdomain is being requested in order to access the correct account. You need to include a Host header in your request.

Also note that when you call send() to send the request, you are sending the entire 1000 bytes of the aa buffer, which is wrong. You need to send only what you actualy filled in.

Lastly, you are not really managing the socket very well in general. You need better error handling.

Try this:

#include <windows.h>
#include <stdio.h>

void abc(char *p, int l = -1)
{
    FILE *fp = fopen("c:\\data.htm", "a+");
    if (fp)
    {
        if (l == -1) l = strlen(p);
        fwrite(p, 1, l, fp);
        fclose(fp);
    }
}

int WINAPI WinMain (HINSTANCE i, HINSTANCE j, char * k, int l)
{
    char aa[1000];

    WSADATA ws;
    int d = WSAStartup(0x101, &ws);
    sprintf(aa, "WSASTARTUP = %d\n", d);
    abc(aa);

    if (d == 0)
    {
        SOCKET s = socket(AF_INET, SOCK_STREAM, 0);
        sprintf(aa, "SOCKET = %d\n", s);
        abc(aa);

        if (s != INVALID_SOCKET)
        {
            char *host = "a.domaindlx.com";
            char *file = "/trysite/hello.asp";

            struct sockaddr_in a;
            memset(&a, 0, sizeof(a));

            a.sin_family = AF_INET;
            a.sin_port = htons(80);

            struct hostent *h = gethostbyname(host);
            if (!h)
            {
                sprintf(aa, "gethostbyname(\"%s\") FAILED\n", host);
                abc(aa);
            }
            else
            {
                sprintf(aa, "gethostbyname(\"%s\") TYPE = %d\n", host, h->h_addrtype);
                abc(aa);

                if (h->h_addrtype == AF_INET)
                {
                    a.sin_addr = * (struct in_addr*) h->h_addr;
                    sprintf(aa, "gethostbyname(\"%s\") IP = %s\n", host, inet_ntoa(a.sin_addr));
                    abc(aa);

                    d = connect(s, (struct sockaddr *) &a, sizeof(a));
                    sprintf(aa, "CONNECT = %d\n", d);
                    abc(aa);

                    if (d == 0)
                    {
                        sprintf(aa,
                            "GET %s HTTP/1.0\r\n"
                            "Host: %s\r\n"
                            "Connection: close\r\n"
                            "\r\n",
                            file, host);

                        char *p = aa;
                        int t = strlen(aa);
                        int li;

                        do
                        {
                            li = send(s, p, t, 0);
                            if (li < 1)
                                break;

                            p += li;
                            t -= li;
                        }
                        while (t > 0);

                        if (t != 0)
                        {
                            abc("SEND FAILED\n");
                        }
                        else
                        {
                            abc("SEND OK\n");

                            do
                            {
                                li = recv(s, aa, sizeof(aa), 0);
                                if (li < 1)
                                    break;

                                abc(aa, li);
                            }
                            while (true);
                        }
                    }
                }
            }

            closesocket(s);
        }

        WSACleanup();
    }

    return 0;
}

I strongly suggest you get a packet sniffer, such as Wireshark. Then you can see EXACTLY what webbrowsers (or any other socket app) is actually sending and receiving. Then you can match that in your code as needed.

1
votes

There are two problems with your code. The first one is that there should be a space not \r\n before HTTP 1.0. Without this your are sending HTTP 0.9.

The second problem is that some IP addresses are used to host multiple sites and require sending a Host header.

The site that tells you "No web site is configured at this address" may work better if you add the Host: header. Your request to that site should look like this:

"GET /trysite/hello.asp HTTP 1.0\r\nHost: a.domaindlx.com\r\n\r\n"

0
votes

You're not following the protocol correctly. You want GET /trysite/hello.asp HTTP/1.0\r\n\r\n See here for the full spec.