0
votes

I run a varnish-3.0.5 instance in front of an Apache/2.4.10 server. Both on the same Ubuntu 12.04.4 machine. The Varnish server randomly forwards some requests to the backend server even it has the page in cache already.

When I do a simple request:

GET http://www.example.com/
Accept: */*
Accept-Encoding: gzip, deflate
Host: www.example.com
User-Agent: runscope/0.1

I sometimes receive a correct response from cache:

Accept-Ranges: bytes
Age: 5973
Cache-Control: private, no-cache, no-store, must-revalidate
Connection: keep-alive
Content-Encoding: gzip
Content-Length: 10015
Content-Type: text/html; charset=utf-8
Date: Fri, 24 Oct 2014 15:26:35 GMT
Expires: -1
Last-Modified: Fri, 24 Oct 2014 13:47:02 GMT
Server: Apache
Vary: Accept-Encoding
Via: 1.1 varnish
X-Varnish: 10531909 10507062

And sometimes (more often) a response directly from Apache:

Cache-Control: no-cache
Connection: keep-alive
Content-Encoding: gzip
Content-Length: 10015
Content-Type: text/html; charset=utf-8
Date: Fri, 24 Oct 2014 15:28:00 GMT
Expires: Thu, 19 Nov 1981 08:52:00 GMT
Last-Modified: Fri, 24 Oct 2014 15:28:00 GMT
Server: Apache
Set-Cookie: [truncated]
Vary: Accept-Encoding

I'm sure the item is still in the cache, because after few responses directly from Apache I receive a response from Varnish where the Age header contains a higher number than the one from the previous answer from Varnish.

My VCL file:

# Default backend definition.  Set this to point to your content server.
backend default {
    .host = "127.0.0.1";
    .port = "8080";
}

# List of IPs allowed to erase things from cache
acl purge {
        "localhost";
        "127.0.0.1"/8;
}

sub vcl_recv {
     # Purge item from the cache
      if (req.request == "PURGE") {
        if (!client.ip ~ purge) {
            error 405 "Not allowed.";
        }

        return (lookup);
     }

       # Setting Forwarding header
     if (req.restarts == 0) {
        if (req.http.x-forwarded-for) {
            set req.http.X-Forwarded-For =
                req.http.X-Forwarded-For + ", " + client.ip;
        } else {
            set req.http.X-Forwarded-For = client.ip;
        }
     }

       ## Pipeline ajax requests except of requests for [truncated]
     if (req.http.X-Requested-With == "XMLHttpRequest" && req.url !~ "^/[truncated]") {
       return(pipe);
     }

     if (req.request != "GET" &&
       req.request != "HEAD" &&
       req.request != "PUT" &&
       req.request != "POST" &&
       req.request != "TRACE" &&
       req.request != "OPTIONS" &&
       req.request != "DELETE") {
         /* Non-RFC2616 or CONNECT which is weird. */
         return (pipe);
     }
     if (req.request != "GET" && req.request != "HEAD") {
         /* We only deal with GET and HEAD by default */
         return (pass);
     }

     ## Remove GCLID param from googles adds
     set req.url = regsuball(req.url,"\?gclid=[^&]+$",""); # strips when QS = "?gclid=AAA"
     set req.url = regsuball(req.url,"\?gclid=[^&]+&","?"); # strips when QS = "?gclid=AAA&foo=bar"
     set req.url = regsuball(req.url,"&gclid=[^&]+",""); # strips when QS = "?foo=bar&gclid=AAA" or QS = "?foo=bar&gclid=AAA&bar=baz"

### always cache these items:

     ## JavaScript
     if (req.request == "GET" && req.url ~ "\.(js)") {
         return (lookup);
     }

       ## images
     if (req.request == "GET" && req.url ~ "\.(gif|jpg|jpeg|bmp|png|tiff|tif|ico|img|tga|wmf)$") {
        return (lookup);
     }

       ## various other content pages   
       if (req.request == "GET" && req.url ~ "\.(css|html)$") { 
        return (lookup);
     }      

     ## multimedia 
     if (req.request == "GET" && req.url ~ "\.(svg|swf|ico|mp3|mp4|m4a|ogg|mov|avi|wmv)$") {
        return (lookup);
     }  

       ## xml
     if (req.request == "GET" && req.url ~ "\.(xml)$") {
        return (lookup);
     }

     ## Do not cache POST requests 
     if (req.request == "POST") {
         return (pipe);
     }    

     ## Do not cache the [truncated]
     if (req.url ~ "^/[truncated]") {
         return (pipe);
     }

     ## Cache the [truncated]
     if (req.url ~ "^/[truncated]") {
         return (lookup);
     }

     ## Cache the [truncated]
     if (req.url ~ "^/[truncated]") {
         return (lookup);
     }

     ## Do not cache the [truncated]
     if (req.url ~ "^/[truncated]") {
         return (pipe);
     }

     ## Exceptions for [truncated]
     if (req.url ~ "^/[truncated]") {
         return (pass);
     }

     ## Exceptions for [truncated]
     if (req.url ~ "^/[truncated]") {
         return (pipe);
     }

     return (lookup);
}

sub vcl_hash {

    hash_data(req.url);
    if (req.http.host) {
        hash_data(req.http.host);
    } else {
        hash_data(server.ip);
    }

    ## Make a request to check the logged in status of the current user
    if (req.request == "GET" && req.url !~ "\.(js|css|html|gif|jpg|jpeg|bmp|png|tiff|tif|ico|img|tga|wmf|svg|swf|ico|mp3|mp4|m4a|ogg|mov|avi|wmv|xml)$" && req.http.cookie ~ "cache-logged-in=1") {
        hash_data("logged-in");
    }

    return (hash);
}

#
sub vcl_hit {
    if (req.request == "PURGE") {
        purge;
        error 200 "Purged.";
    }

    return (deliver);
}

#
sub vcl_miss {
    if (req.request == "PURGE") {
        purge;
        error 200 "Purged.";
    }

    return (fetch);
}

#
sub vcl_fetch {  

 ## If the request to the backend returns a code other than 200, restart the loop
 ## If the number of restarts reaches the value of the parameter max_restarts,
 ## the request will be error'ed.  max_restarts defaults to 4.  This prevents
 ## an eternal loop in the event that, e.g., the object does not exist at all.
 ## this rule also allows for 301's and 302's redirects...

    ## Unset cookies sent from the backend if they are not necessary
    if (req.url ~ "[truncated]")) {
        if (beresp.http.set-cookie !~ "cache-logged-in") {
            unset beresp.http.set-cookie;
        }
    }     

      if (beresp.status != 200 && beresp.status != 403 && beresp.status != 404 && beresp.status != 301 && beresp.status != 302 && beresp.status != 303) {
        return (restart);
    }

    # if I cant connect to the backend, ill set the grace period to be 7200 seconds to hold onto content
    set beresp.ttl = 7200s;
    set beresp.grace = 7200s;

    # If the header X-nocache is present, do not cache the item
    if (beresp.http.x-nocache) {
        return (hit_for_pass);
    }

    if (beresp.status == 404) { 
        set beresp.ttl = 0s; 
    }

    if (beresp.status >= 500) { 
        set beresp.ttl = 0s; 
    }

    if (req.request == "GET" && req.url ~ "\.(gif|jpg|jpeg|bmp|png|tiff|tif|ico|img|tga|wmf)$") {
        set beresp.ttl = 86400s;     
    }
    else if (req.request == "GET" && req.url ~ "\.(css|html)$") {
        ## various other content pages
        set beresp.ttl = 86400s;
    }
    else if (req.request == "GET" && req.url ~ "\.(js)$") {
        set beresp.ttl = 86400s;
    }
    else if (req.request == "GET" && req.url ~ "\.(xml)$") {
        set beresp.ttl = 86400s;
    }
    else if (req.request == "GET" && req.url ~ "\.(svg|swf|ico|mp3|mp4|m4a|ogg|mov|avi|wmv)$") {
        ## multimedia
        set beresp.ttl = 86400s;
    }
    else {
        set beresp.http.Expires = "-1";
        set beresp.http.Cache-Control = "private, no-cache, no-store, must-revalidate";
    }


    return(deliver); 
}
2

2 Answers

0
votes

The requests forwarded to apache are not identical to the ones already in varnish, thats the simple explanation. As to how they differ there are multiple options for which you need to see the full requests, but could include: - cookies (notice that the apache requests are setting cookies, so thats where i would start). - difference in casing of the url (your hash is case sensitive).

0
votes

unset req.http.cookie in vcl_recv.