Skip to content
Snippets Groups Projects
Commit b34b4eec authored by Peter Cai's avatar Peter Cai
Browse files

add support for memcached F-Stack

parent e31a8db5
No related branches found
No related tags found
No related merge requests found
...@@ -2,6 +2,7 @@ IS_CUSTOM_KERNEL=false ...@@ -2,6 +2,7 @@ IS_CUSTOM_KERNEL=false
CPUPOWER=cpupower CPUPOWER=cpupower
X86_ENERGY_PERF_POLICY=x86_energy_perf_policy X86_ENERGY_PERF_POLICY=x86_energy_perf_policy
PERF=perf PERF=perf
FSTACK_SRC=/home/p5cai/workspace/f-stack
# Execute all SSH commands under this user # Execute all SSH commands under this user
# so that we don't need ssh keys for root # so that we don't need ssh keys for root
......
MEMCACHED=/home/p5cai/workspace/memcached/memcached MEMCACHED=/home/p5cai/workspace/memcached/memcached
MEMCACHED_FSTACK=/home/p5cai/workspace/memcached-fstack/memcached
...@@ -4,6 +4,8 @@ CONNS=160 # per agent thread ...@@ -4,6 +4,8 @@ CONNS=160 # per agent thread
NO_RUNS=0 NO_RUNS=0
REINIT_RUNS=4
load_config() { load_config() {
source "$EXP_ROOT/configs/memcached.sh" source "$EXP_ROOT/configs/memcached.sh"
source "$EXP_ROOT/configs/mutilate.sh" source "$EXP_ROOT/configs/mutilate.sh"
...@@ -23,7 +25,7 @@ cleanup_memcached() { ...@@ -23,7 +25,7 @@ cleanup_memcached() {
start_memcached() { start_memcached() {
echo "Starting memcached" echo "Starting memcached"
taskset -c 0-$((THREADS - 1)) $MEMCACHED -u root -t $THREADS -b 16384 -c 32768 -m 10240 -o hashpower=24,no_lru_maintainer,no_lru_crawler $MEMCACHED_ARGS 2>&1 > /dev/null & taskset -c 0-$((THREADS - 1)) $MEMCACHED -u root -t $THREADS -b 16384 -c 32768 -m 10240 -o hashpower=24,no_lru_maintainer,no_lru_crawler $MEMCACHED_ARGS 2>&1 > /tmp/memcached.lastexp.log &
MEMCACHED_PID=$! MEMCACHED_PID=$!
} }
...@@ -40,7 +42,7 @@ warmup() { ...@@ -40,7 +42,7 @@ warmup() {
} }
run_mutilate() { run_mutilate() {
if [ "$NO_RUNS" -ge 4 ]; then if [ "$NO_RUNS" -ge "$REINIT_RUNS" ]; then
echo "Re-intializing Memcached server" echo "Re-intializing Memcached server"
memcached_shutdown memcached_shutdown
memcached_startup memcached_startup
...@@ -56,6 +58,15 @@ run_mutilate() { ...@@ -56,6 +58,15 @@ run_mutilate() {
memcached_pre_start() { memcached_pre_start() {
load_config load_config
if [ "$FSTACK" == "true" ] && [ $THREADS -gt 1 ]; then
echo "Cannot run F-Stack memcached in multi-threaded mode"
exit 1
fi
if [ "$FSTACK" == "true" ]; then
MEMCACHED="$MEMCACHED_FSTACK"
fi
EXPERIMENT_NAME_EXT="t$THREADS.c$CONNS" EXPERIMENT_NAME_EXT="t$THREADS.c$CONNS"
if [ "$NAPI_LOCALITY" == "true" ]; then if [ "$NAPI_LOCALITY" == "true" ]; then
...@@ -75,6 +86,7 @@ memcached_init() { ...@@ -75,6 +86,7 @@ memcached_init() {
memcached_startup() { memcached_startup() {
start_memcached start_memcached
start_mutilate start_mutilate
sleep 15
warmup warmup
} }
......
...@@ -3,6 +3,9 @@ source "$EXP_ROOT/experiments/memcached.shared.sh" ...@@ -3,6 +3,9 @@ source "$EXP_ROOT/experiments/memcached.shared.sh"
pre_start() { pre_start() {
memcached_pre_start memcached_pre_start
EXPERIMENT_NAME_EXT="t$THREADS" EXPERIMENT_NAME_EXT="t$THREADS"
if [ "$FSTACK" == "true" ]; then
REINIT_RUNS=2
fi
} }
run_exp() { run_exp() {
...@@ -12,6 +15,7 @@ run_exp() { ...@@ -12,6 +15,7 @@ run_exp() {
for CONNS in 10 20 40 80 160 240 320; do for CONNS in 10 20 40 80 160 240 320; do
export CONNS export CONNS
run_mutilate c$CONNS run_mutilate c$CONNS
sleep 10
done done
memcached_deinit memcached_deinit
......
...@@ -18,6 +18,7 @@ run_exp() { ...@@ -18,6 +18,7 @@ run_exp() {
for QPS in $(seq $QPS_START $QPS_STEP $QPS_END); do for QPS in $(seq $QPS_START $QPS_STEP $QPS_END); do
export QPS export QPS
run_mutilate $QPS run_mutilate $QPS
sleep 15
done done
memcached_deinit memcached_deinit
......
[dpdk]
# Hexadecimal bitmask of cores to run on.
lcore_mask=1
# Number of memory channels.
channel=4
# Specify base virtual address to map.
#base_virtaddr=0x7f0000000000
# Promiscuous mode of nic, defualt: enabled.
promiscuous=1
numa_on=1
# TX checksum offload skip, default: disabled.
# We need this switch enabled in the following cases:
# -> The application want to enforce wrong checksum for testing purposes
# -> Some cards advertize the offload capability. However, doesn't calculate checksum.
tx_csum_offoad_skip=0
# TCP segment offload, default: disabled.
tso=0
# HW vlan strip, default: enabled.
vlan_strip=1
# sleep when no pkts incomming
# unit: microseconds
idle_sleep=0
# sent packet delay time(0-100) while send less than 32 pkts.
# default 100 us.
# if set 0, means send pkts immediately.
# if set >100, will dealy 100 us.
# unit: microseconds
pkt_tx_delay=100
# use symmetric Receive-side Scaling(RSS) key, default: disabled.
symmetric_rss=0
# PCI device enable list.
# And driver options
#pci_whitelist=02:00.0
# for multiple PCI devices
#pci_whitelist=02:00.0,03:00.0
# enabled port list
#
# EBNF grammar:
#
# exp ::= num_list {"," num_list}
# num_list ::= <num> | <range>
# range ::= <num>"-"<num>
# num ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
#
# examples
# 0-3 ports 0, 1,2,3 are enabled
# 1-3,4,7 ports 1,2,3,4,7 are enabled
#
# If use bonding, shoule config the bonding port id in port_list
# and not config slave port id in port_list
# such as, port 0 and port 1 trank to a bonding port 2,
# should set `port_list=2` and config `[port2]` section
port_list=1
# Number of vdev.
nb_vdev=0
# Number of bond.
nb_bond=0
# log level for dpdk, optional
log_level=1
# Each core write into own pcap file, which is open one time, close one time if enough.
# Support dump the first snaplen bytes of each packet.
# if pcap file is lager than savelen bytes, it will be closed and next file was dumped into.
[pcap]
enable=0
snaplen=96
savelen=16777216
savepath=.
# Port config section
# Correspond to dpdk.port_list's index: port0, port1...
[port1]
addr=192.168.199.1
netmask=255.255.255.0
broadcast=192.168.199.255
gateway=192.168.199.99
# set interface name, Optional parameter.
# if_name=eno3d1
# IPv6 net addr, Optional parameters.
#addr6=ff::02
#prefix_len=64
#gateway6=ff::01
# Multi virtual IPv4/IPv6 net addr, Optional parameters.
# `vip_ifname`: default `f-stack-x`
# `vip_addr`: Separated by semicolons, MAX number 64;
# Only support netmask 255.255.255.255, broadcast x.x.x.255 now, hard code in `ff_veth_setvaddr`.
# `vip_addr6`: Separated by semicolons, MAX number 64.
# `vip_prefix_len`: All addr6 use the same prefix now, default 64.
#vip_ifname=lo0
#vip_addr=192.168.1.3;192.168.1.4;192.168.1.5;192.168.1.6
#vip_addr6=ff::03;ff::04;ff::05;ff::06;ff::07
#vip_prefix_len=64
# lcore list used to handle this port
# the format is same as port_list
#lcore_list=0
# bonding slave port list used to handle this port
# need to config while this port is a bonding port
# the format is same as port_list
#slave_port_list=0,1
# Vdev config section
# orrespond to dpdk.nb_vdev's index: vdev0, vdev1...
# iface : Shouldn't set always.
# path : The vuser device path in container. Required.
# queues : The max queues of vuser. Optional, default 1, greater or equal to the number of processes.
# queue_size : Queue size.Optional, default 256.
# mac : The mac address of vuser. Optional, default random, if vhost use phy NIC, it should be set to the phy NIC's mac.
# cq : Optional, if queues = 1, default 0; if queues > 1 default 1.
#[vdev0]
##iface=/usr/local/var/run/openvswitch/vhost-user0
#path=/var/run/openvswitch/vhost-user0
#queues=1
#queue_size=256
#mac=00:00:00:00:00:01
#cq=0
# bond config section
# See http://doc.dpdk.org/guides/prog_guide/link_bonding_poll_mode_drv_lib.html
#[bond0]
#mode=4
#slave=0000:0a:00.0,slave=0000:0a:00.1
#primary=0000:0a:00.0
#mac=f0:98:38:xx:xx:xx
## opt argument
#socket_id=0
#xmit_policy=l23
#lsc_poll_period_ms=100
#up_delay=10
#down_delay=50
# Kni config: if enabled and method=reject,
# all packets that do not belong to the following tcp_port and udp_port
# will transmit to kernel; if method=accept, all packets that belong to
# the following tcp_port and udp_port will transmit to kernel.
#[kni]
#enable=1
#method=reject
# The format is same as port_list
#tcp_port=80,443
#udp_port=53
# FreeBSD network performance tuning configurations.
# Most native FreeBSD configurations are supported.
[freebsd.boot]
# If use rack/bbr which depend HPTS, you should set a greater value of hz, such as 100000 means a tick is 10us.
hz=100000
# Block out a range of descriptors to avoid overlap
# with the kernel's descriptor space.
# You can increase this value according to your app.
fd_reserve=1024
kern.ipc.maxsockets=262144
net.inet.tcp.syncache.hashsize=4096
net.inet.tcp.syncache.bucketlimit=100
net.inet.tcp.tcbhashsize=4096
kern.ncallout=262144
kern.features.inet6=1
net.inet6.ip6.auto_linklocal=1
net.inet6.ip6.accept_rtadv=2
net.inet6.icmp6.rediraccept=1
net.inet6.ip6.forwarding=0
kern.ipc.soacceptqueue=65536
kern.maxfiles=65536
[freebsd.sysctl]
kern.ipc.somaxconn=65536
kern.ipc.maxsockbuf=16777216
kern.ipc.soacceptqueue=65536
kern.maxfiles=65536
net.link.ether.inet.maxhold=5
net.inet.tcp.fast_finwait2_recycle=1
net.inet.tcp.sendspace=16384
net.inet.tcp.recvspace=8192
#net.inet.tcp.nolocaltimewait=1
net.inet.tcp.cc.algorithm=cubic
net.inet.tcp.sendbuf_max=16777216
net.inet.tcp.recvbuf_max=16777216
net.inet.tcp.sendbuf_auto=1
net.inet.tcp.recvbuf_auto=1
net.inet.tcp.sendbuf_inc=16384
#net.inet.tcp.recvbuf_inc=524288
net.inet.tcp.sack.enable=1
net.inet.tcp.blackhole=1
net.inet.tcp.msl=2000
net.inet.tcp.delayed_ack=1
net.inet.tcp.rfc1323=1
net.inet.udp.blackhole=1
net.inet.ip.redirect=0
net.inet.ip.forwarding=0
# set default stacks:freebsd, rack or bbr, may be you need increase the value of parameter 'freebsd.boot.hz' while use rack or bbr.
net.inet.tcp.functions_default=freebsd
# need by bbr, should enable it.
net.inet.tcp.hpts.skip_swi=1
...@@ -36,6 +36,11 @@ while [[ $# -gt 0 ]]; do ...@@ -36,6 +36,11 @@ while [[ $# -gt 0 ]]; do
shift shift
shift shift
;; ;;
--fstack)
FSTACK=true
USERSTACK=true
shift
;;
*) *)
echo "Unknown option $1" echo "Unknown option $1"
exit 1 exit 1
...@@ -53,8 +58,27 @@ if [ ! -f "./experiments/$EXPERIMENT.sh" ]; then ...@@ -53,8 +58,27 @@ if [ ! -f "./experiments/$EXPERIMENT.sh" ]; then
exit 1 exit 1
fi fi
source ./tunings/normalize_queues.sh if [ "$USERSTACK" == "true" ] && [ ! -z "$EXTRA_TUNING" ]; then
[ ! -z "$EXTRA_TUNING" ] && source ./tunings/"$EXTRA_TUNING".sh echo "Cannot apply extra tuning when using user stacks"
exit 1
fi
if [ "$USERSTACK" != "true" ]; then
source ./tunings/normalize_queues.sh
[ ! -z "$EXTRA_TUNING" ] && source ./tunings/"$EXTRA_TUNING".sh
fi
# Initialization for DPDK
if [ "$USERSTACK" == "true" ]; then
# mlx4 supports bifurcation, and we only need to work around a kernel driver bug by re-loading the module
modprobe -r mlx4_ib mlx4_en mlx4_core
modprobe mlx4_en mlx4_ib
echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
echo 1024 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
echo 1024 > /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages
mkdir /mnt/huge || true
mount -t hugetlbfs nodev /mnt/huge || true
fi
source "./experiments/$EXPERIMENT.sh" source "./experiments/$EXPERIMENT.sh"
...@@ -64,6 +88,9 @@ EXPERIMENT_NAME="$EXPERIMENT.$(uname -r)" ...@@ -64,6 +88,9 @@ EXPERIMENT_NAME="$EXPERIMENT.$(uname -r)"
if [ ! -z "$EXTRA_TUNING" ]; then if [ ! -z "$EXTRA_TUNING" ]; then
EXPERIMENT_NAME="$EXPERIMENT_NAME.$EXTRA_TUNING" EXPERIMENT_NAME="$EXPERIMENT_NAME.$EXTRA_TUNING"
fi fi
if [ "$FSTACK" == "true" ]; then
EXPERIMENT_NAME="$EXPERIMENT_NAME.fstack"
fi
if [ ! -z "$EXPERIMENT_NAME_EXT" ]; then if [ ! -z "$EXPERIMENT_NAME_EXT" ]; then
EXPERIMENT_NAME="$EXPERIMENT_NAME.$EXPERIMENT_NAME_EXT" EXPERIMENT_NAME="$EXPERIMENT_NAME.$EXPERIMENT_NAME_EXT"
fi fi
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment