mirror of https://github.com/torvalds/linux.git
There is a slightly faster way (in terms of the number of instructions
being used) to calculate the position of a middle element, preserving
integer overflow safeness.
./scripts/bloat-o-meter lib/bsearch.o.old lib/bsearch.o.new
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-24 (-24)
function old new delta
bsearch 122 98 -24
TEST
INT array of size 100001, elements [0..100000]. gcc 7.1, Os, x86_64.
a) bsearch() of existing key "100001 - 2":
BASE
====
$ perf stat ./a.out
Performance counter stats for './a.out':
619.445196 task-clock:u (msec) # 0.999 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
133 page-faults:u # 0.215 K/sec
1,949,517,279 cycles:u # 3.147 GHz (83.06%)
181,017,938 stalled-cycles-frontend:u # 9.29% frontend cycles idle (83.05%)
82,959,265 stalled-cycles-backend:u # 4.26% backend cycles idle (67.02%)
4,355,706,383 instructions:u # 2.23 insn per cycle
# 0.04 stalled cycles per insn (83.54%)
1,051,539,242 branches:u # 1697.550 M/sec (83.54%)
15,263,381 branch-misses:u # 1.45% of all branches (83.43%)
0.620082548 seconds time elapsed
PATCHED
=======
$ perf stat ./a.out
Performance counter stats for './a.out':
475.097316 task-clock:u (msec) # 0.999 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
135 page-faults:u # 0.284 K/sec
1,487,467,717 cycles:u # 3.131 GHz (82.95%)
186,537,162 stalled-cycles-frontend:u # 12.54% frontend cycles idle (82.93%)
28,797,869 stalled-cycles-backend:u # 1.94% backend cycles idle (67.10%)
3,807,564,203 instructions:u # 2.56 insn per cycle
# 0.05 stalled cycles per insn (83.57%)
1,049,344,291 branches:u # 2208.693 M/sec (83.60%)
5,485 branch-misses:u # 0.00% of all branches (83.58%)
0.475760235 seconds time elapsed
b) bsearch() of un-existing key "100001 + 2":
BASE
====
$ perf stat ./a.out
Performance counter stats for './a.out':
499.244480 task-clock:u (msec) # 0.999 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
132 page-faults:u # 0.264 K/sec
1,571,194,855 cycles:u # 3.147 GHz (83.18%)
13,450,980 stalled-cycles-frontend:u # 0.86% frontend cycles idle (83.18%)
21,256,072 stalled-cycles-backend:u # 1.35% backend cycles idle (66.78%)
4,171,197,909 instructions:u # 2.65 insn per cycle
# 0.01 stalled cycles per insn (83.68%)
1,009,175,281 branches:u # 2021.405 M/sec (83.79%)
3,122 branch-misses:u # 0.00% of all branches (83.37%)
0.499871144 seconds time elapsed
PATCHED
=======
$ perf stat ./a.out
Performance counter stats for './a.out':
399.023499 task-clock:u (msec) # 0.998 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
134 page-faults:u # 0.336 K/sec
1,245,793,991 cycles:u # 3.122 GHz (83.39%)
11,529,273 stalled-cycles-frontend:u # 0.93% frontend cycles idle (83.46%)
12,116,311 stalled-cycles-backend:u # 0.97% backend cycles idle (66.92%)
3,679,710,005 instructions:u # 2.95 insn per cycle
# 0.00 stalled cycles per insn (83.47%)
1,009,792,625 branches:u # 2530.660 M/sec (83.46%)
2,590 branch-misses:u # 0.00% of all branches (83.12%)
0.399733539 seconds time elapsed
Link: http://lkml.kernel.org/r/20170607150457.5905-1-sergey.senozhatsky@gmail.com
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||
|---|---|---|
| .. | ||
| 842 | ||
| fonts | ||
| lz4 | ||
| lzo | ||
| mpi | ||
| raid6 | ||
| reed_solomon | ||
| xz | ||
| zlib_deflate | ||
| zlib_inflate | ||
| .gitignore | ||
| Kconfig | ||
| Kconfig.debug | ||
| Kconfig.kasan | ||
| Kconfig.kgdb | ||
| Kconfig.kmemcheck | ||
| Kconfig.ubsan | ||
| Makefile | ||
| argv_split.c | ||
| asn1_decoder.c | ||
| assoc_array.c | ||
| atomic64.c | ||
| atomic64_test.c | ||
| audit.c | ||
| bcd.c | ||
| bch.c | ||
| bitmap.c | ||
| bitrev.c | ||
| bsearch.c | ||
| btree.c | ||
| bug.c | ||
| build_OID_registry | ||
| bust_spinlocks.c | ||
| chacha20.c | ||
| check_signature.c | ||
| checksum.c | ||
| clz_ctz.c | ||
| clz_tab.c | ||
| cmdline.c | ||
| compat_audit.c | ||
| cordic.c | ||
| cpu_rmap.c | ||
| cpumask.c | ||
| crc-ccitt.c | ||
| crc-itu-t.c | ||
| crc-t10dif.c | ||
| crc4.c | ||
| crc7.c | ||
| crc8.c | ||
| crc16.c | ||
| crc32.c | ||
| crc32defs.h | ||
| crc32test.c | ||
| ctype.c | ||
| debug_info.c | ||
| debug_locks.c | ||
| debugobjects.c | ||
| dec_and_lock.c | ||
| decompress.c | ||
| decompress_bunzip2.c | ||
| decompress_inflate.c | ||
| decompress_unlz4.c | ||
| decompress_unlzma.c | ||
| decompress_unlzo.c | ||
| decompress_unxz.c | ||
| devres.c | ||
| digsig.c | ||
| div64.c | ||
| dma-debug.c | ||
| dma-noop.c | ||
| dma-virt.c | ||
| dump_stack.c | ||
| dynamic_debug.c | ||
| dynamic_queue_limits.c | ||
| earlycpio.c | ||
| errseq.c | ||
| extable.c | ||
| fault-inject.c | ||
| fdt.c | ||
| fdt_empty_tree.c | ||
| fdt_ro.c | ||
| fdt_rw.c | ||
| fdt_strerror.c | ||
| fdt_sw.c | ||
| fdt_wip.c | ||
| find_bit.c | ||
| flex_array.c | ||
| flex_proportions.c | ||
| gcd.c | ||
| gen_crc32table.c | ||
| genalloc.c | ||
| glob.c | ||
| globtest.c | ||
| hexdump.c | ||
| hweight.c | ||
| idr.c | ||
| inflate.c | ||
| int_sqrt.c | ||
| interval_tree.c | ||
| interval_tree_test.c | ||
| iomap.c | ||
| iomap_copy.c | ||
| iommu-common.c | ||
| iommu-helper.c | ||
| ioremap.c | ||
| iov_iter.c | ||
| irq_poll.c | ||
| irq_regs.c | ||
| is_single_threaded.c | ||
| jedec_ddr_data.c | ||
| kasprintf.c | ||
| kfifo.c | ||
| klist.c | ||
| kobject.c | ||
| kobject_uevent.c | ||
| kstrtox.c | ||
| kstrtox.h | ||
| lcm.c | ||
| libcrc32c.c | ||
| list_debug.c | ||
| list_sort.c | ||
| llist.c | ||
| locking-selftest-hardirq.h | ||
| locking-selftest-mutex.h | ||
| locking-selftest-rlock-hardirq.h | ||
| locking-selftest-rlock-softirq.h | ||
| locking-selftest-rlock.h | ||
| locking-selftest-rsem.h | ||
| locking-selftest-rtmutex.h | ||
| locking-selftest-softirq.h | ||
| locking-selftest-spin-hardirq.h | ||
| locking-selftest-spin-softirq.h | ||
| locking-selftest-spin.h | ||
| locking-selftest-wlock-hardirq.h | ||
| locking-selftest-wlock-softirq.h | ||
| locking-selftest-wlock.h | ||
| locking-selftest-wsem.h | ||
| locking-selftest.c | ||
| lockref.c | ||
| lru_cache.c | ||
| memory-notifier-error-inject.c | ||
| memweight.c | ||
| net_utils.c | ||
| netdev-notifier-error-inject.c | ||
| nlattr.c | ||
| nmi_backtrace.c | ||
| nodemask.c | ||
| notifier-error-inject.c | ||
| notifier-error-inject.h | ||
| of-reconfig-notifier-error-inject.c | ||
| oid_registry.c | ||
| once.c | ||
| parman.c | ||
| parser.c | ||
| pci_iomap.c | ||
| percpu-refcount.c | ||
| percpu_counter.c | ||
| percpu_ida.c | ||
| percpu_test.c | ||
| plist.c | ||
| pm-notifier-error-inject.c | ||
| prime_numbers.c | ||
| radix-tree.c | ||
| random32.c | ||
| ratelimit.c | ||
| rational.c | ||
| rbtree.c | ||
| rbtree_test.c | ||
| reciprocal_div.c | ||
| refcount.c | ||
| rhashtable.c | ||
| sbitmap.c | ||
| scatterlist.c | ||
| seq_buf.c | ||
| sg_pool.c | ||
| sg_split.c | ||
| sha1.c | ||
| show_mem.c | ||
| siphash.c | ||
| smp_processor_id.c | ||
| sort.c | ||
| stackdepot.c | ||
| stmp_device.c | ||
| string.c | ||
| string_helpers.c | ||
| strncpy_from_user.c | ||
| strnlen_user.c | ||
| swiotlb.c | ||
| syscall.c | ||
| test-kstrtox.c | ||
| test-string_helpers.c | ||
| test_bitmap.c | ||
| test_bpf.c | ||
| test_firmware.c | ||
| test_hash.c | ||
| test_hexdump.c | ||
| test_kasan.c | ||
| test_list_sort.c | ||
| test_module.c | ||
| test_parman.c | ||
| test_printf.c | ||
| test_rhashtable.c | ||
| test_siphash.c | ||
| test_sort.c | ||
| test_static_key_base.c | ||
| test_static_keys.c | ||
| test_user_copy.c | ||
| test_uuid.c | ||
| textsearch.c | ||
| timerqueue.c | ||
| ts_bm.c | ||
| ts_fsm.c | ||
| ts_kmp.c | ||
| ubsan.c | ||
| ubsan.h | ||
| ucs2_string.c | ||
| usercopy.c | ||
| uuid.c | ||
| vsprintf.c | ||
| win_minmax.c | ||