Hi,
We've been running nginx-1.8 instance on one of our media server to
serve big static .mp4 files as well as small files such as .jpeg. Nginx
is
serving well under 13K connections/sec with 800Mbps outgoing network
load
but whenever requests exceed 15K connections, nginx gets halt and ‘D’
status goes all over around the nginx workers, as well as network load
drops down to 400Mbps due to which video streaming gets stuck and after
5-10 minutes load starts dropping and nginx starts stabilizing again as
well as network load gets back to 800Mbps. We’ve been encountering this
fluctuating situation on each 15minutes gap (Probably).
We know that ‘D’ status is most likely due to high Disk I/O and to
ensure
that the disk i/o could be the problem under 15K connections, we had
enabled apache on port 8080 for testing same video stream during high
load
and buffered on apache, well the stream was fluctuating a bit but there
was
no stuck for around 5-10 minutes. In the meantime the same video was
worst
on nginx and stucked for 5minutes during buffer.
We suspecting this to be related to something else than Disk I/O, reason
is
the same video under high load buffers better on apache(on port 8080).
Also
if it is related to high disk I/O, there must be no possibility that
video
should should stuck for 5-10 minutes.
It looks to us that nginx gets halt when concurrent connections exceed
15K.
We also tried optimizing backlog directive which slightly improved the
performance but there must be something more related to nginx
optimization
which we must be missing. I have linked nginx.conf file, sysctl and
vhost
file to get better understanding of our tweaks.
user nginx;
worker_processes 48;
worker_rlimit_nofile 600000; #2 filehandlers for each connection
#error_log logs/error.log;
#error_log logs/error.log notice;
error_log /var/log/nginx/error.log error;
#error_log /dev/null;
#pid logs/nginx.pid;
events {
worker_connections 2048;
use epoll;
use kqueue;
}
http {
include mime.types;
default_type application/octet-stream;
client_max_body_size 800M;
client_body_buffer_size 128K;
output_buffers 1 512k;
sendfile_max_chunk 128k;
client_header_buffer_size 256k;
large_client_header_buffers 4 256k;
fastcgi_buffers 512 8k;
proxy_buffers 512 8k;
fastcgi_read_timeout 300s;
server_tokens off; #Conceals nginx version
access_log off;
access_log /var/log/nginx/access.log;
sendfile off;
sendfile ;
tcp_nodelay on;
aio on;
directio 512;
tcp_nopush on;
client_header_timeout 120s;
client_body_timeout 120s;
send_timeout 120s;
keepalive_timeout 15;
gzip on;
gzip_vary on;
gzip_disable “MSIE [1-6].”;
gzip_proxied any;
gzip_http_version 1.0;
gzip_min_length 1280;
gzip_comp_level 6;
gzip_buffers 16 8k;
gzip_types text/plain text/xml text/css application/x-javascript
image/png image/x-icon image/gif image/jpeg image/jpg application/xml
application/xml+rss text/javascr ipt application/atom+xml;
include /usr/local/nginx/conf/vhosts/*.conf;
open_file_cache max=2000 inactive=20s;
open_file_cache_valid 60s;
open_file_cache_min_uses 5;
open_file_cache_errors off;
}
sysctl.conf main config :
fs.file-max = 700000
net.core.wmem_max=6291456
net.core.rmem_max=6291456
net.ipv4.tcp_rmem= 10240 87380 6291456
net.ipv4.tcp_wmem= 10240 87380 6291456
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_sack = 1
net.ipv4.tcp_no_metrics_save = 1
net.core.netdev_max_backlog = 10000
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
net.ipv6.conf.eth0.disable_ipv6 = 1
net.ipv6.conf.eth1.disable_ipv6 = 1
net.ipv6.conf.ppp0.disable_ipv6 = 1
net.ipv6.conf.tun0.disable_ipv6 = 1
vm.dirty_background_ratio = 50
vm.dirty_ratio = 80
net.ipv4.tcp_fin_timeout = 30
net.ipv4.ip_local_port_range=1024 65000
net.ipv4.tcp_tw_reuse = 1
net.netfilter.nf_conntrack_tcp_timeout_established = 54000
net.ipv4.netfilter.ip_conntrack_generic_timeout = 120
net.ipv4.tcp_syn_retries=2
net.ipv4.tcp_synack_retries=2
net.ipv4.netfilter.ip_conntrack_max = 90536
net.core.somaxconn = 10000
Vhost :
server {
listen 80 backlog=10000;
server_name archive3.domain.com archive3.domain.com
www.archive3.domain.com www.archive3.domain.com;
access_log off;
location / {
root /content/archive;
index index.html index.htm index.php;
autoindex off;
}
location /files/thumbs/ {
root /data/nginx/archive;
add_header X-Cache SSD;
expires max;
}
location ~ .(flv)$ {
flv;
root /content/archive;
aio on;
directio 512;
output_buffers 1 2m;
expires 7d;
valid_referers none blocked domain.com *.domain.com *.
facebook.com *.domain.com *.twitter.com *.domain.com *.gear3rd.net
domain.com *.domain.com tunemedia.tv www.tunemedia.tv
embed.tunemedia.tv;
if ($invalid_referer) {
return 403;
}
}
location ~ .(mp4)$ {
mp4;
mp4_buffer_size 4M;
mp4_max_buffer_size 10M;
expires 7d;
root /content/archive;
valid_referers none blocked domain.com *.domain.com *.
facebook.com *.domain.com *.twitter.com *.domain.com *.gear3rd.net
domain.com *.domain.com tunemedia.tv www.tunemedia.tv
embed.tunemedia.tv;
if ($invalid_referer) {
return 403;
}
}
pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
location ~ \.php$ {
root /content/archive;
fastcgi_pass 127.0.0.1:9000;
fastcgi_index index.php;
fastcgi_param SCRIPT_FILENAME
$document_root$fastcgi_script_name;
include fastcgi_params;
fastcgi_read_timeout 10000;
}
location ~ /\.ht {
deny all;
}
location ~ ^/(status|ping)$ {
access_log off;
allow 127.0.0.1;
deny all;
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
include fastcgi_params;
fastcgi_pass 127.0.0.1:9000;
}
}
Server Specs :
L5630 (8cores, 16threads)
RAM 64GB
12 x 3TB @ SATA Hardware Raid-6
Here’s the screenshot of server load during 15K connections:
Regards.
Shahzaib