mirror of https://github.com/torvalds/linux.git
Arnaldo and I agreed it could be applied now, because I have other
pending patches depending on this one (Thank you Arnaldo)
(The other important patch moves skc_refcnt in a separate cache line,
so that the SMP/NUMA performance doesnt suffer from cache line ping pongs)
1) First some performance data :
--------------------------------
tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established()
The most time critical code is :
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
The sk_for_each() does use prefetch() hints but only the begining of
"struct sock" is prefetched.
As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far
away from the begining of "struct sock", it has to bring into CPU
cache cold cache line. Each iteration has to use at least 2 cache
lines.
This can be problematic if some chains are very long.
2) The goal
-----------
The idea I had is to change things so that INET_MATCH() may return
FALSE in 99% of cases only using the data already in the CPU cache,
using one cache line per iteration.
3) Description of the patch
---------------------------
Adds a new 'unsigned int skc_hash' field in 'struct sock_common',
filling a 32 bits hole on 64 bits platform.
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse;
int skc_bound_dev_if;
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ unsigned int skc_hash;
struct proto *skc_prot;
};
Store in this 32 bits field the full hash, not masked by (ehash_size -
1) Using this full hash as the first comparison done in INET_MATCH
permits us immediatly skip the element without touching a second cache
line in case of a miss.
Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to
sk_hash and tw_hash) already contains the slot number if we mask with
(ehash_size - 1)
File include/net/inet_hashtables.h
64 bits platforms :
#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash))
((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
32bits platforms:
#define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
(inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
- Adds a prefetch(head->chain.first) in
__inet_lookup_established()/__tcp_v4_check_established() and
__inet6_lookup_established()/__tcp_v6_check_established() and
__dccp_v4_check_established() to bring into cache the first element of the
list, before the {read|write}_lock(&head->lock);
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
|
||
|---|---|---|
| .. | ||
| bluetooth | ||
| irda | ||
| sctp | ||
| tc_act | ||
| act_api.h | ||
| act_generic.h | ||
| addrconf.h | ||
| af_unix.h | ||
| ah.h | ||
| arp.h | ||
| atmclip.h | ||
| ax25.h | ||
| checksum.h | ||
| compat.h | ||
| datalink.h | ||
| dn.h | ||
| dn_dev.h | ||
| dn_fib.h | ||
| dn_neigh.h | ||
| dn_nsp.h | ||
| dn_route.h | ||
| dsfield.h | ||
| dst.h | ||
| esp.h | ||
| flow.h | ||
| gen_stats.h | ||
| icmp.h | ||
| ieee80211.h | ||
| ieee80211_crypt.h | ||
| if_inet6.h | ||
| inet6_hashtables.h | ||
| inet_common.h | ||
| inet_connection_sock.h | ||
| inet_ecn.h | ||
| inet_hashtables.h | ||
| inet_timewait_sock.h | ||
| inetpeer.h | ||
| ip.h | ||
| ip6_checksum.h | ||
| ip6_fib.h | ||
| ip6_route.h | ||
| ip6_tunnel.h | ||
| ip_fib.h | ||
| ip_mp_alg.h | ||
| ip_vs.h | ||
| ipcomp.h | ||
| ipconfig.h | ||
| ipip.h | ||
| ipv6.h | ||
| ipx.h | ||
| iw_handler.h | ||
| lapb.h | ||
| llc.h | ||
| llc_c_ac.h | ||
| llc_c_ev.h | ||
| llc_c_st.h | ||
| llc_conn.h | ||
| llc_if.h | ||
| llc_pdu.h | ||
| llc_s_ac.h | ||
| llc_s_ev.h | ||
| llc_s_st.h | ||
| llc_sap.h | ||
| ndisc.h | ||
| neighbour.h | ||
| netrom.h | ||
| p8022.h | ||
| pkt_act.h | ||
| pkt_cls.h | ||
| pkt_sched.h | ||
| protocol.h | ||
| psnap.h | ||
| raw.h | ||
| rawv6.h | ||
| request_sock.h | ||
| rose.h | ||
| route.h | ||
| sch_generic.h | ||
| scm.h | ||
| slhc_vj.h | ||
| snmp.h | ||
| sock.h | ||
| syncppp.h | ||
| tcp.h | ||
| tcp_ecn.h | ||
| tcp_states.h | ||
| transp_v6.h | ||
| udp.h | ||
| x25.h | ||
| x25device.h | ||
| xfrm.h | ||