Below are a few TCP tunables that I ran into when looking into TCP performance tuning for CEPH.
Note that there are two separate sections for 10GE connectivity, so you will want to test with both to find what works best for your environment.
To implement, we just add what is below to /etc/sysctl.d/99-sysctl.conf and run “sysctl -p“. Changes are persistent across reboots. Ideally these TCP tunables should be deployed to all CEPH nodes (OSD most importantly).
[code language=”css”]
## Increase Linux autotuning TCP buffer limits
## Set max to 16MB (16777216) for 1GE
## 32MB (33554432) or 54MB (56623104) for 10GE
# 1GE/16MB (16777216)
#net.core.rmem_max = 16777216
#net.core.wmem_max = 16777216
#net.core.rmem_default = 16777216
#net.core.wmem_default = 16777216
#net.core.optmem_max = 40960
#net.ipv4.tcp_rmem = 4096 87380 16777216
#net.ipv4.tcp_wmem = 4096 65536 16777216
# 10GE/32MB (33554432)
#net.core.rmem_max = 33554432
#net.core.wmem_max = 33554432
#net.core.rmem_default = 33554432
#net.core.wmem_default = 33554432
#net.core.optmem_max = 40960
#net.ipv4.tcp_rmem = 4096 87380 33554432
#net.ipv4.tcp_wmem = 4096 65536 33554432
# 10GB/54MB (56623104)
net.core.rmem_max = 56623104
net.core.wmem_max = 56623104
net.core.rmem_default = 56623104
net.core.wmem_default = 56623104
net.core.optmem_max = 40960
net.ipv4.tcp_rmem = 4096 87380 56623104
net.ipv4.tcp_wmem = 4096 65536 56623104
## Increase number of incoming connections. The value can be raised to bursts of request, default is 128
net.core.somaxconn = 1024
## Increase number of incoming connections backlog, default is 1000
net.core.netdev_max_backlog = 50000
## Maximum number of remembered connection requests, default is 128
net.ipv4.tcp_max_syn_backlog = 30000
## Increase the tcp-time-wait buckets pool size to prevent simple DOS attacks, default is 8192
net.ipv4.tcp_max_tw_buckets = 2000000
# Recycle and Reuse TIME_WAIT sockets faster, default is 0 for both
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_tw_reuse = 1
## Decrease TIME_WAIT seconds, default is 30 seconds
net.ipv4.tcp_fin_timeout = 10
## Tells the system whether it should start at the default window size only for TCP connections
## that have been idle for too long, default is 1
net.ipv4.tcp_slow_start_after_idle = 0
#If your servers talk UDP, also up these limits, default is 4096
net.ipv4.udp_rmem_min = 8192
net.ipv4.udp_wmem_min = 8192
## Disable source redirects
## Default is 1
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.all.accept_redirects = 0
## Disable source routing, default is 0
net.ipv4.conf.all.accept_source_route = 0
[/code]
Reference here