Backend fail & 503 Service Unavailable

pub crawler pubcrawler.com at gmail.com
Tue Sep 22 22:45:09 CEST 2009


Well I might be a bit off on this, but 503's often involve ulimit
ceiling maximums being hit and exceeded.

ulimit -a

What is that limit set at for open files?

In our experience many high performance web related software products
suffer from low open file limit. I've seen 256 in SunOS, 1024 in
Ubuntu.  We bump our settings in startup scripts for key servers to
8096.  We run on dedicated servers - so no resource contention with
other users.

-Paul

On 9/22/09, M L <ml at tinwong.com> wrote:
> Hi list
>
>
> I love varnish and really want to use it :D  Any clue to fix my problem , it
> come out alot backend fail ( i guess timeout problem )
>
> my setup
>
> 1.Centos 5.3 64bit varnish / webserver
> 2.nginx backend server (it run over 200+days 2Mil pv/day without any problem
> & healthy hardware)
> 3.varnish connect to nginx in same internal switchs  (3com 5500 Giga layer4)
> 4.Tested different version nginx was same happen ( nginx-0.6.36 &
> nginx-0.7.61)
> 5.Tested 2 different hardware for varnish same happen
> 6.Changed nginx different timeout same happen , if changed to keepalive 0
> will more backend fail
> 7.When i changed vcl to director rr x50 times , it didnt show 503 Service
> Unavailable on client side but like 2-8 sec. lag  when then Backend fail
> number increase
>
>
> #start
>
> varnishd -p lru_interval=3600 -a 0.0.0.0:80 -T localhost:3500 -p
> client_http11=on -f vconf2 -s file,/usr/local/varnish/cache.bin,80G -h
> classic,500009 -p listen_depth=4096 -p obj_workspace=32768 -p
> sess_workspace=32768 -p send_timeout=327 -p first_byte_timeout=300 -p
> connect_timeout=5 -p vcl_trace=on
>
>
>
> #varnishlog
>
>   140 ReqStart     c 121.203.78.124 4755 1383283991
>   140 RxRequest    c GET
>   140 RxURL        c /thread-1131553-1-1.html
>   140 RxProtocol   c HTTP/1.1
>   140 RxHeader     c Host: www.zoobar.com
>   140 RxHeader     c User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1;
> zh-TW; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14
>   140 RxHeader     c Accept: image/png,image/*;q=0.8,*/*;q=0.5
>   140 RxHeader     c Accept-Language: zh-tw,en-us;q=0.7,en;q=0.3
>   140 RxHeader     c Accept-Encoding: gzip,deflate
>   140 RxHeader     c Accept-Charset: Big5,utf-8;q=0.7,*;q=0.7
>   140 RxHeader     c Keep-Alive: 300
>   140 RxHeader     c Connection: keep-alive
>   140 RxHeader     c Referer: http://www.zoobar.com/thread-1131553-1-1.html
>   140 RxHeader     c Cookie: cdb_sid=Nf6FM3;
> cdb_oldtopics=D1131553D1131773D1129353D1129581D1130425D1131274D1121699D1122534D1122581D1124932D1125704D1126044D1126641D1126650D1127247D1128227D1128288D1128329D1128640D1129165D1129403D1130965D1131013D1131057D1131460D1131464D1131491D
>   140 VCL_call     c recv
>   140 VCL_trace    c 1 106.14
>   140 VCL_trace    c 2 110.5
>   140 VCL_trace    c 3 110.41
>   140 VCL_trace    c 9 143.5
>   140 VCL_trace    c 14 157.1
>   140 VCL_trace    c 15 157.5
>   140 VCL_trace    c 17 162.1
>   140 VCL_trace    c 18 162.5
>   140 VCL_trace    c 26 172.9
>   140 VCL_trace    c 27 172.13
>   140 VCL_trace    c 30 180.1
>   140 VCL_trace    c 31 180.5
>   140 VCL_trace    c 33 184.1
>   140 VCL_trace    c 34 184.5
>   140 VCL_trace    c 36 192.1
>   140 VCL_trace    c 37 192.5
>   140 VCL_trace    c 39 198.1
>   140 VCL_trace    c 40 198.5
>   140 VCL_trace    c 41 198.31
>   140 VCL_trace    c 42 199.9
>   140 VCL_trace    c 44 202.18
>   140 VCL_trace    c 45 202.53
>   140 VCL_trace    c 49 213.1
>   140 VCL_trace    c 84 42.14
>   140 VCL_trace    c 85 43.9
>   140 VCL_trace    c 93 53.5
>   140 VCL_trace    c 94 53.9
>   140 VCL_trace    c 97 57.5
>   140 VCL_trace    c 98 57.9
>   140 VCL_trace    c 99 57.35
>   140 VCL_trace    c 100 57.52
>   140 VCL_return   c pass
>   140 VCL_call     c pass
>   140 VCL_trace    c 103 74.14
>   140 VCL_return   c pass
>   140 VCL_call     c error
>   140 VCL_trace    c 124 129.15
>   140 VCL_return   c deliver
>   140 Length       c 466
>   140 VCL_call     c deliver
>   140 VCL_trace    c 69 327.17
>   140 VCL_trace    c 70 328.21
>   140 VCL_trace    c 75 344.1
>   140 VCL_trace    c 120 110.17
>   140 VCL_return   c deliver
>   140 TxProtocol   c HTTP/1.1
>   140 TxStatus     c 503
>   140 TxResponse   c Service Unavailable
>   140 TxHeader     c Server: Varnish
>   140 TxHeader     c Retry-After: 0
>   140 TxHeader     c Content-Type: text/html; charset=utf-8
>   140 TxHeader     c Content-Length: 466
>   140 TxHeader     c Date: Tue, 22 Sep 2009 16:15:52 GMT
>   140 TxHeader     c X-Varnish: 1383283991
>   140 TxHeader     c Age: 0
>   140 TxHeader     c Via: 1.1 varnish
>   140 TxHeader     c Connection: close
>   140 ReqEnd       c 1383283991 1253636152.715658903 1253636152.715944052
> 0.016985893 0.000265121 0.000020027
>   140 SessionClose c error
>   140 StatSess     c 121.203.78.124 4755 0 1 1 0 1 0 235 466
>
>
>
>
>   #varnishstat -1
>
> uptime                    266          .   Child uptime
> client_conn             13993        52.61 Client connections accepted
> client_req              43378       163.08 Client requests received
> cache_hit               31219       117.36 Cache hits
> cache_hitpass              86         0.32 Cache hits for pass
> cache_miss               3523        13.24 Cache misses
> backend_conn            12054        45.32 Backend connections success
> backend_unhealthy            0         0.00 Backend connections not
> attempted
> backend_busy                0         0.00 Backend connections too many
> backend_fail             5900        22.18 Backend connections failures
> backend_reuse            3503        13.17 Backend connections reuses
> backend_recycle         11552        43.43 Backend connections recycles
> backend_unused              0         0.00 Backend connections unused
> n_srcaddr                1246          .   N struct srcaddr
> n_srcaddr_act              64          .   N active struct srcaddr
> n_sess_mem                974          .   N struct sess_mem
> n_sess                     84          .   N struct sess
> n_object                 3040          .   N struct object
> n_objecthead             1972          .   N struct objecthead
> n_smf                    6460          .   N struct smf
> n_smf_frag                573          .   N small free smf
> n_smf_large                 7          .   N large free smf
> n_vbe_conn                119          .   N struct vbe_conn
> n_bereq                   240          .   N struct bereq
> n_wrk                     261          .   N worker threads
> n_wrk_create              261         0.98 N worker threads created
> n_wrk_failed                0         0.00 N worker threads not created
> n_wrk_max              336496      1265.02 N worker threads limited
> n_wrk_queue                 0         0.00 N queued work requests
> n_wrk_overflow           4696        17.65 N overflowed work requests
> n_wrk_drop                374         1.41 N dropped work requests
> n_backend                  60          .   N backends
> n_expired                 675          .   N expired objects
> n_lru_nuked                 0          .   N LRU nuked objects
> n_lru_saved                 0          .   N LRU saved objects
> n_lru_moved                 0          .   N LRU moved objects
> n_deathrow                  0          .   N objects on deathrow
> losthdr                     0         0.00 HTTP header overflows
> n_objsendfile               0         0.00 Objects sent with sendfile
> n_objwrite              41590       156.35 Objects sent with write
> n_objoverflow               0         0.00 Objects overflowing workspace
> s_sess                  10425        39.19 Total Sessions
> s_req                   43325       162.88 Total Requests
> s_pipe                      0         0.00 Total pipe
> s_pass                   8542        32.11 Total pass
> s_fetch                 11996        45.10 Total fetch
> s_hdrbytes           16332373     61399.90 Total header bytes
> s_bodybytes         266640005   1002406.03 Total body bytes
> sess_closed              2116         7.95 Session Closed
> sess_pipeline              54         0.20 Session Pipeline
> sess_readahead             34         0.13 Session Read Ahead
> sess_linger                 0         0.00 Session Linger
> sess_herd               41284       155.20 Session herd
> shm_records           3806411     14309.82 SHM records
> shm_writes             134677       506.30 SHM writes
> shm_flushes                40         0.15 SHM flushes due to overflow
> shm_cont                  884         3.32 SHM MTX contention
> shm_cycles                  1         0.00 SHM cycles through buffer
> sm_nreq                 24335        91.48 allocator requests
> sm_nobj                  5880          .   outstanding allocations
> sm_balloc           129392640          .   bytes allocated
> sm_bfree          85769953280          .   bytes free
> sma_nreq                    0         0.00 SMA allocator requests
> sma_nobj                    0          .   SMA outstanding allocations
> sma_nbytes                  0          .   SMA outstanding bytes
> sma_balloc                  0          .   SMA bytes allocated
> sma_bfree                   0          .   SMA bytes free
> sms_nreq                  111         0.42 SMS allocator requests
> sms_nobj                    0          .   SMS outstanding allocations
> sms_nbytes                  0          .   SMS outstanding bytes
> sms_balloc              50376          .   SMS bytes allocated
> sms_bfree               50376          .   SMS bytes freed
> backend_req             12054        45.32 Backend requests made
> n_vcl                       1         0.00 N vcl total
> n_vcl_avail                 1         0.00 N vcl available
> n_vcl_discard               0         0.00 N vcl discarded
> n_purge                     1          .   N total active purges
> n_purge_add                 1         0.00 N new purges added
> n_purge_retire              0         0.00 N old purges deleted
> n_purge_obj_test            0         0.00 N objects tested
> n_purge_re_test             0         0.00 N regexps tested against
> n_purge_dups                0         0.00 N duplicate purges removed
> hcb_nolock                  0         0.00 HCB Lookups without lock
> hcb_lock                    0         0.00 HCB Lookups with lock
> hcb_insert                  0         0.00 HCB Inserts
> esi_parse                   0         0.00 Objects ESI parsed (unlock)
> esi_errors                  0         0.00 ESI parse errors (unlock)
>
>
> # my vcl
>
> director srv1 round-robin {
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     { .backend = { .connect_timeout = 2s; .host = "10.0.0.5"; .port = "80";
> } }
>     }
>
>
> acl purge {
>
>     "localhost"; "127.0.0.1";
> }
>
> #recv
> sub vcl_recv {
>
> #set req.grace = 30s;
>
> if (req.http.host ~ "www.zoobar.com") {
>     set req.http.host = "www.zoobar.com";
>     set req.backend = srv1;
>
>
> }elseif ( req.http.host ~ "www.voobar.com") {
>     set req.http.host = "www.voobar.com";
>     set req.backend = srv1;
>
>
> }elseif ( req.http.host ~ "www.hoobar.com") {
>     set req.http.host = "www.hoobar.com";
>     set req.backend = srv1;
>
> }else{
>     error 401 "Bad Domain";
> }
>
> # Add a unique header containing the client address
> remove req.http.X-Forwarded-For;
> set    req.http.X-Forwarded-For = client.ip;
> # [...]
>
>
> if (req.request == "PURGE") {
>     if(!client.ip ~ purge) {
>         error 405 "Not Allowed";
>     } lookup;}
>
> if (req.http.Expect) {
>         pipe;
>     }
>
>
> if (req.request != "GET" &&
>                 req.request != "HEAD" &&
>                 req.request != "PUT" &&
>                 req.request != "POST" &&
>                 req.request != "TRACE" &&
>                 req.request != "OPTIONS" &&
>                 req.request != "DELETE") {
>                         /* Non-RFC2616 or CONNECT which is weird. */
>                         pipe;
>         }
>         if (req.request != "GET" && req.request != "HEAD") {
>                 /* We only deal with GET and HEAD by default */
>                 pass;
>         }
>
>
>
>
> if (req.http.Cache-Control ~ "no-cache") {
>         pass;
>     }
>
> if (req.http.Authenticate) {
>     pass;
>   }
>
> #if (req.http.Cookie) {
> #    pass;
> #  }
>
> if (req.url ~
> "\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
>     unset req.http.cookie;
>     lookup;
> #    unset req.http.authenticate;
>     }
>
> if (req.http.Accept-Encoding) {
>     if (req.url ~
> "\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
>             # No point in compressing these
>             remove req.http.Accept-Encoding;
>         } elsif (req.http.Accept-Encoding ~ "gzip") {
>             set req.http.Accept-Encoding = "gzip";
>         } elsif (req.http.Accept-Encoding ~ "deflate") {
>             set req.http.Accept-Encoding = "deflate";
>         } else {
>             # unkown algorithm
>             remove req.http.Accept-Encoding;
>         }
>     }
>
>
> } #end recv
>
>
> sub vcl_hash {
> set req.hash += req.url;
> set req.hash += req.http.host;
> #set req.hash += req.http.cookie;
> #set req.hash += server.ip;
> hash;
> }  #end hash
>
>
> # strip the cookie before the image is inserted into cache.
> sub vcl_fetch {
>
> #set obj.grace = 30s;
>
>
> if(obj.http.Set-Cookie){
>                 pass;
>                 }
>
>
>   if(obj.http.Pragma ~ "no-cache" ||
>        obj.http.Cache-Control ~ "no-cache" ||
>         obj.http.Cache-Control ~ "private"){
>         pass;
>      }
>
>
> if (req.url ~
> "\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
>         unset obj.http.set-cookie;
>         set obj.ttl = 1w;
>                }
>
>
>
>
> if (req.url ~
> "\.(zip|ico|dat|torrent|png|gif|jpg|swf|css|js|bmp|bz2|tbz|mp3|ogg)$") {
> unset obj.http.expires;
> set obj.http.cache-control = "max-age=315360000, public";
> set obj.ttl = 1w;
> set obj.http.magicmarker = "1";
> }
>
>
>     if (obj.status == 503) {
>
>         restart;
>     }
>
>
> # if (obj.cacheable) {
> #                        /* Remove Expires from backend, it's not long
> enough */
> #                        unset obj.http.expires;
>
> #                        /* Set the clients TTL on this object */
> #                        set obj.http.cache-control = "max-age=315360000,
> public";
>
> #                        /* Set how long Varnish will keep it */
> #                        set obj.ttl = 1w;
>
> #                        /* marker for vcl_deliver to reset Age: */
> #                        set obj.http.magicmarker = "1";
> #                }
>
>
>     } #fetch end
>
>
>
>
> sub vcl_deliver {
>                 if (resp.http.magicmarker) {
>                         /* Remove the magic marker */
>                         unset resp.http.magicmarker;
>
>                         /* By definition we have a fresh object */
>                         set resp.http.age = "0";
> if (obj.hits > 0) {
>                  set resp.http.X-Cache = "HIT";
>         } else {
>                  set resp.http.X-Cache = "MISS";
>        }
>
>                 }
>
>
> } #deliver end
>
>
> sub vcl_pipe {
>     # http://varnish.projects.linpro.no/ticket/451
>     # This forces every pipe request to be the first one.
>     set bereq.http.connection = "close";
> } #pipe end
>
>
> sub vcl_hit {
>          if (req.request == "PURGE") {
>                  set obj.ttl = 0s;
>                  error 200 "Purged.";
>          }
>
>          if (!obj.cacheable) {
>                  pass;
>          }
>          deliver;
>  }
>
>
>
>
> Thank you
>
> TW
>


More information about the varnish-misc mailing list