Backend fail & 503 Service Unavailable

M L ml at tinwong.com
Tue Sep 22 19:22:06 CEST 2009


Hi list


I love varnish and really want to use it :D  Any clue to fix my problem , it
come out alot backend fail ( i guess timeout problem )

my setup

1.Centos 5.3 64bit varnish / webserver
2.nginx backend server (it run over 200+days 2Mil pv/day without any problem
& healthy hardware)
3.varnish connect to nginx in same internal switchs  (3com 5500 Giga layer4)
4.Tested different version nginx was same happen ( nginx-0.6.36 &
nginx-0.7.61)
5.Tested 2 different hardware for varnish same happen
6.Changed nginx different timeout same happen , if changed to keepalive 0
will more backend fail
7.When i changed vcl to director rr x50 times , it didnt show 503 Service
Unavailable on client side but like 2-8 sec. lag  when then Backend fail
number increase


#start

varnishd -p lru_interval=3600 -a 0.0.0.0:80 -T localhost:3500 -p
client_http11=on -f vconf2 -s file,/usr/local/varnish/cache.bin,80G -h
classic,500009 -p listen_depth=4096 -p obj_workspace=32768 -p
sess_workspace=32768 -p send_timeout=327 -p first_byte_timeout=300 -p
connect_timeout=5 -p vcl_trace=on



#varnishlog

  140 ReqStart     c 121.203.78.124 4755 1383283991
  140 RxRequest    c GET
  140 RxURL        c /thread-1131553-1-1.html
  140 RxProtocol   c HTTP/1.1
  140 RxHeader     c Host: www.zoobar.com
  140 RxHeader     c User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1;
zh-TW; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14
  140 RxHeader     c Accept: image/png,image/*;q=0.8,*/*;q=0.5
  140 RxHeader     c Accept-Language: zh-tw,en-us;q=0.7,en;q=0.3
  140 RxHeader     c Accept-Encoding: gzip,deflate
  140 RxHeader     c Accept-Charset: Big5,utf-8;q=0.7,*;q=0.7
  140 RxHeader     c Keep-Alive: 300
  140 RxHeader     c Connection: keep-alive
  140 RxHeader     c Referer: http://www.zoobar.com/thread-1131553-1-1.html
  140 RxHeader     c Cookie: cdb_sid=Nf6FM3;
cdb_oldtopics=D1131553D1131773D1129353D1129581D1130425D1131274D1121699D1122534D1122581D1124932D1125704D1126044D1126641D1126650D1127247D1128227D1128288D1128329D1128640D1129165D1129403D1130965D1131013D1131057D1131460D1131464D1131491D
  140 VCL_call     c recv
  140 VCL_trace    c 1 106.14
  140 VCL_trace    c 2 110.5
  140 VCL_trace    c 3 110.41
  140 VCL_trace    c 9 143.5
  140 VCL_trace    c 14 157.1
  140 VCL_trace    c 15 157.5
  140 VCL_trace    c 17 162.1
  140 VCL_trace    c 18 162.5
  140 VCL_trace    c 26 172.9
  140 VCL_trace    c 27 172.13
  140 VCL_trace    c 30 180.1
  140 VCL_trace    c 31 180.5
  140 VCL_trace    c 33 184.1
  140 VCL_trace    c 34 184.5
  140 VCL_trace    c 36 192.1
  140 VCL_trace    c 37 192.5
  140 VCL_trace    c 39 198.1
  140 VCL_trace    c 40 198.5
  140 VCL_trace    c 41 198.31
  140 VCL_trace    c 42 199.9
  140 VCL_trace    c 44 202.18
  140 VCL_trace    c 45 202.53
  140 VCL_trace    c 49 213.1
  140 VCL_trace    c 84 42.14
  140 VCL_trace    c 85 43.9
  140 VCL_trace    c 93 53.5
  140 VCL_trace    c 94 53.9
  140 VCL_trace    c 97 57.5
  140 VCL_trace    c 98 57.9
  140 VCL_trace    c 99 57.35
  140 VCL_trace    c 100 57.52
  140 VCL_return   c pass
  140 VCL_call     c pass
  140 VCL_trace    c 103 74.14
  140 VCL_return   c pass
  140 VCL_call     c error
  140 VCL_trace    c 124 129.15
  140 VCL_return   c deliver
  140 Length       c 466
  140 VCL_call     c deliver
  140 VCL_trace    c 69 327.17
  140 VCL_trace    c 70 328.21
  140 VCL_trace    c 75 344.1
  140 VCL_trace    c 120 110.17
  140 VCL_return   c deliver
  140 TxProtocol   c HTTP/1.1
  140 TxStatus     c 503
  140 TxResponse   c Service Unavailable
  140 TxHeader     c Server: Varnish
  140 TxHeader     c Retry-After: 0
  140 TxHeader     c Content-Type: text/html; charset=utf-8
  140 TxHeader     c Content-Length: 466
  140 TxHeader     c Date: Tue, 22 Sep 2009 16:15:52 GMT
  140 TxHeader     c X-Varnish: 1383283991
  140 TxHeader     c Age: 0
  140 TxHeader     c Via: 1.1 varnish
  140 TxHeader     c Connection: close
  140 ReqEnd       c 1383283991 1253636152.715658903 1253636152.715944052
0.016985893 0.000265121 0.000020027
  140 SessionClose c error
  140 StatSess     c 121.203.78.124 4755 0 1 1 0 1 0 235 466




  #varnishstat -1

uptime                    266          .   Child uptime
client_conn             13993        52.61 Client connections accepted
client_req              43378       163.08 Client requests received
cache_hit               31219       117.36 Cache hits
cache_hitpass              86         0.32 Cache hits for pass
cache_miss               3523        13.24 Cache misses
backend_conn            12054        45.32 Backend connections success
backend_unhealthy            0         0.00 Backend connections not
attempted
backend_busy                0         0.00 Backend connections too many
backend_fail             5900        22.18 Backend connections failures
backend_reuse            3503        13.17 Backend connections reuses
backend_recycle         11552        43.43 Backend connections recycles
backend_unused              0         0.00 Backend connections unused
n_srcaddr                1246          .   N struct srcaddr
n_srcaddr_act              64          .   N active struct srcaddr
n_sess_mem                974          .   N struct sess_mem
n_sess                     84          .   N struct sess
n_object                 3040          .   N struct object
n_objecthead             1972          .   N struct objecthead
n_smf                    6460          .   N struct smf
n_smf_frag                573          .   N small free smf
n_smf_large                 7          .   N large free smf
n_vbe_conn                119          .   N struct vbe_conn
n_bereq                   240          .   N struct bereq
n_wrk                     261          .   N worker threads
n_wrk_create              261         0.98 N worker threads created
n_wrk_failed                0         0.00 N worker threads not created
n_wrk_max              336496      1265.02 N worker threads limited
n_wrk_queue                 0         0.00 N queued work requests
n_wrk_overflow           4696        17.65 N overflowed work requests
n_wrk_drop                374         1.41 N dropped work requests
n_backend                  60          .   N backends
n_expired                 675          .   N expired objects
n_lru_nuked                 0          .   N LRU nuked objects
n_lru_saved                 0          .   N LRU saved objects
n_lru_moved                 0          .   N LRU moved objects
n_deathrow                  0          .   N objects on deathrow
losthdr                     0         0.00 HTTP header overflows
n_objsendfile               0         0.00 Objects sent with sendfile
n_objwrite              41590       156.35 Objects sent with write
n_objoverflow               0         0.00 Objects overflowing workspace
s_sess                  10425        39.19 Total Sessions
s_req                   43325       162.88 Total Requests
s_pipe                      0         0.00 Total pipe
s_pass                   8542        32.11 Total pass
s_fetch                 11996        45.10 Total fetch
s_hdrbytes           16332373     61399.90 Total header bytes
s_bodybytes         266640005   1002406.03 Total body bytes
sess_closed              2116         7.95 Session Closed
sess_pipeline              54         0.20 Session Pipeline
sess_readahead             34         0.13 Session Read Ahead
sess_linger                 0         0.00 Session Linger
sess_herd               41284       155.20 Session herd
shm_records           3806411     14309.82 SHM records
shm_writes             134677       506.30 SHM writes
shm_flushes                40         0.15 SHM flushes due to overflow
shm_cont                  884         3.32 SHM MTX contention
shm_cycles                  1         0.00 SHM cycles through buffer
sm_nreq                 24335        91.48 allocator requests
sm_nobj                  5880          .   outstanding allocations
sm_balloc           129392640          .   bytes allocated
sm_bfree          85769953280          .   bytes free
sma_nreq                    0         0.00 SMA allocator requests
sma_nobj                    0          .   SMA outstanding allocations
sma_nbytes                  0          .   SMA outstanding bytes
sma_balloc                  0          .   SMA bytes allocated
sma_bfree                   0          .   SMA bytes free
sms_nreq                  111         0.42 SMS allocator requests
sms_nobj                    0          .   SMS outstanding allocations
sms_nbytes                  0          .   SMS outstanding bytes
sms_balloc              50376          .   SMS bytes allocated
sms_bfree               50376          .   SMS bytes freed
backend_req             12054        45.32 Backend requests made
n_vcl                       1         0.00 N vcl total
n_vcl_avail                 1         0.00 N vcl available
n_vcl_discard               0         0.00 N vcl discarded
n_purge                     1          .   N total active purges
n_purge_add                 1         0.00 N new purges added
n_purge_retire              0         0.00 N old purges deleted
n_purge_obj_test            0         0.00 N objects tested
n_purge_re_test             0         0.00 N regexps tested against
n_purge_dups                0         0.00 N duplicate purges removed
hcb_nolock                  0         0.00 HCB Lookups without lock
hcb_lock                    0         0.00 HCB Lookups with lock
hcb_insert                  0         0.00 HCB Inserts
esi_parse                   0         0.00 Objects ESI parsed (unlock)
esi_errors                  0         0.00 ESI parse errors (unlock)


# my vcl

director srv1 round-robin {
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
} }
    }


acl purge {

    "localhost"; "127.0.0.1";
}

#recv
sub vcl_recv {

#set req.grace = 30s;

if (req.http.host ~ "www.zoobar.com") {
    set req.http.host = "www.zoobar.com";
    set req.backend = srv1;


}elseif ( req.http.host ~ "www.voobar.com") {
    set req.http.host = "www.voobar.com";
    set req.backend = srv1;


}elseif ( req.http.host ~ "www.hoobar.com") {
    set req.http.host = "www.hoobar.com";
    set req.backend = srv1;

}else{
    error 401 "Bad Domain";
}

# Add a unique header containing the client address
remove req.http.X-Forwarded-For;
set    req.http.X-Forwarded-For = client.ip;
# [...]


if (req.request == "PURGE") {
    if(!client.ip ~ purge) {
        error 405 "Not Allowed";
    } lookup;}

if (req.http.Expect) {
        pipe;
    }


if (req.request != "GET" &&
                req.request != "HEAD" &&
                req.request != "PUT" &&
                req.request != "POST" &&
                req.request != "TRACE" &&
                req.request != "OPTIONS" &&
                req.request != "DELETE") {
                        /* Non-RFC2616 or CONNECT which is weird. */
                        pipe;
        }
        if (req.request != "GET" && req.request != "HEAD") {
                /* We only deal with GET and HEAD by default */
                pass;
        }




if (req.http.Cache-Control ~ "no-cache") {
        pass;
    }

if (req.http.Authenticate) {
    pass;
  }

#if (req.http.Cookie) {
#    pass;
#  }

if (req.url ~
"\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
    unset req.http.cookie;
    lookup;
#    unset req.http.authenticate;
    }

if (req.http.Accept-Encoding) {
    if (req.url ~
"\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
            # No point in compressing these
            remove req.http.Accept-Encoding;
        } elsif (req.http.Accept-Encoding ~ "gzip") {
            set req.http.Accept-Encoding = "gzip";
        } elsif (req.http.Accept-Encoding ~ "deflate") {
            set req.http.Accept-Encoding = "deflate";
        } else {
            # unkown algorithm
            remove req.http.Accept-Encoding;
        }
    }


} #end recv


sub vcl_hash {
set req.hash += req.url;
set req.hash += req.http.host;
#set req.hash += req.http.cookie;
#set req.hash += server.ip;
hash;
}  #end hash


# strip the cookie before the image is inserted into cache.
sub vcl_fetch {

#set obj.grace = 30s;


if(obj.http.Set-Cookie){
                pass;
                }


  if(obj.http.Pragma ~ "no-cache" ||
       obj.http.Cache-Control ~ "no-cache" ||
        obj.http.Cache-Control ~ "private"){
        pass;
     }


if (req.url ~
"\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
        unset obj.http.set-cookie;
        set obj.ttl = 1w;
               }




if (req.url ~
"\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
unset obj.http.expires;
set obj.http.cache-control = "max-age=315360000, public";
set obj.ttl = 1w;
set obj.http.magicmarker = "1";
}


    if (obj.status == 503) {

        restart;
    }


# if (obj.cacheable) {
#                        /* Remove Expires from backend, it's not long
enough */
#                        unset obj.http.expires;

#                        /* Set the clients TTL on this object */
#                        set obj.http.cache-control = "max-age=315360000,
public";

#                        /* Set how long Varnish will keep it */
#                        set obj.ttl = 1w;

#                        /* marker for vcl_deliver to reset Age: */
#                        set obj.http.magicmarker = "1";
#                }


    } #fetch end




sub vcl_deliver {
                if (resp.http.magicmarker) {
                        /* Remove the magic marker */
                        unset resp.http.magicmarker;

                        /* By definition we have a fresh object */
                        set resp.http.age = "0";
if (obj.hits > 0) {
                 set resp.http.X-Cache = "HIT";
        } else {
                 set resp.http.X-Cache = "MISS";
       }

                }


} #deliver end


sub vcl_pipe {
    # http://varnish.projects.linpro.no/ticket/451
    # This forces every pipe request to be the first one.
    set bereq.http.connection = "close";
} #pipe end


sub vcl_hit {
         if (req.request == "PURGE") {
                 set obj.ttl = 0s;
                 error 200 "Purged.";
         }

         if (!obj.cacheable) {
                 pass;
         }
         deliver;
 }




Thank you

TW
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://projects.linpro.no/pipermail/varnish-misc/attachments/20090923/ab1497b1/attachment-0001.htm 


More information about the varnish-misc mailing list