mirror of https://github.com/torvalds/linux.git
397 lines
13 KiB
Python
Executable File
397 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: GPL-2.0
|
|
# Author: Breno Leitao <leitao@debian.org>
|
|
"""
|
|
This test aims to evaluate the netpoll polling mechanism (as in
|
|
netpoll_poll_dev()). It presents a complex scenario where the network
|
|
attempts to send a packet but fails, prompting it to poll the NIC from within
|
|
the netpoll TX side.
|
|
|
|
This has been a crucial path in netpoll that was previously untested. Jakub
|
|
suggested using a single RX/TX queue, pushing traffic to the NIC, and then
|
|
sending netpoll messages (via netconsole) to trigger the poll.
|
|
|
|
In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
|
|
so, the test passes, otherwise it will be skipped. This test is very dependent on
|
|
the driver and environment, given we are trying to trigger a tricky scenario.
|
|
"""
|
|
|
|
import errno
|
|
import logging
|
|
import os
|
|
import random
|
|
import string
|
|
import threading
|
|
import time
|
|
from typing import Optional
|
|
|
|
from lib.py import (
|
|
bpftrace,
|
|
CmdExitFailure,
|
|
defer,
|
|
ethtool,
|
|
GenerateTraffic,
|
|
ksft_exit,
|
|
ksft_pr,
|
|
ksft_run,
|
|
KsftFailEx,
|
|
KsftSkipEx,
|
|
NetDrvEpEnv,
|
|
KsftXfailEx,
|
|
)
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
)
|
|
|
|
NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
|
|
NETCONS_REMOTE_PORT: int = 6666
|
|
NETCONS_LOCAL_PORT: int = 1514
|
|
|
|
# Max number of netcons messages to send. Each iteration will setup
|
|
# netconsole and send MAX_WRITES messages
|
|
ITERATIONS: int = 20
|
|
# Number of writes to /dev/kmsg per iteration
|
|
MAX_WRITES: int = 40
|
|
# MAPS contains the information coming from bpftrace it will have only one
|
|
# key: "hits", which tells the number of times netpoll_poll_dev() was called
|
|
MAPS: dict[str, int] = {}
|
|
# Thread to run bpftrace in parallel
|
|
BPF_THREAD: Optional[threading.Thread] = None
|
|
# Time bpftrace will be running in parallel.
|
|
BPFTRACE_TIMEOUT: int = 10
|
|
|
|
|
|
def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
|
|
"""
|
|
Read the ringsize using ethtool. This will be used to restore it after the test
|
|
"""
|
|
try:
|
|
ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
|
|
rxs = ethtool_result["rx"]
|
|
txs = ethtool_result["tx"]
|
|
except (KeyError, IndexError) as exception:
|
|
raise KsftSkipEx(
|
|
f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
|
|
) from exception
|
|
|
|
return rxs, txs
|
|
|
|
|
|
def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
|
|
"""Try to the number of RX and TX ringsize."""
|
|
rxs = ring_size[0]
|
|
txs = ring_size[1]
|
|
|
|
logging.debug("Setting ring size to %d/%d", rxs, txs)
|
|
try:
|
|
ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
|
|
except CmdExitFailure:
|
|
# This might fail on real device, retry with a higher value,
|
|
# worst case, keep it as it is.
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
|
|
"""Read the number of RX, TX and combined queues using ethtool"""
|
|
|
|
try:
|
|
ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
|
|
rxq = ethtool_result.get("rx", -1)
|
|
txq = ethtool_result.get("tx", -1)
|
|
combined = ethtool_result.get("combined", -1)
|
|
|
|
except IndexError as exception:
|
|
raise KsftSkipEx(
|
|
f"Failed to read queues numbers: {exception}. Not going to mess with them."
|
|
) from exception
|
|
|
|
return rxq, txq, combined
|
|
|
|
|
|
def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
|
|
"""Set the number of RX, TX and combined queues using ethtool"""
|
|
rxq, txq, combined = queues
|
|
|
|
cmdline = f"-L {interface_name}"
|
|
|
|
if rxq != -1:
|
|
cmdline += f" rx {rxq}"
|
|
if txq != -1:
|
|
cmdline += f" tx {txq}"
|
|
if combined != -1:
|
|
cmdline += f" combined {combined}"
|
|
|
|
logging.debug("calling: ethtool %s", cmdline)
|
|
|
|
try:
|
|
ethtool(cmdline)
|
|
except CmdExitFailure as exception:
|
|
raise KsftSkipEx(
|
|
f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
|
|
) from exception
|
|
|
|
|
|
def netcons_generate_random_target_name() -> str:
|
|
"""Generate a random target name starting with 'netcons'"""
|
|
random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
|
|
return f"netcons_{random_suffix}"
|
|
|
|
|
|
def netcons_create_target(
|
|
config_data: dict[str, str],
|
|
target_name: str,
|
|
) -> None:
|
|
"""Create a netconsole dynamic target against the interfaces"""
|
|
logging.debug("Using netconsole name: %s", target_name)
|
|
try:
|
|
os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
|
|
logging.debug(
|
|
"Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
|
|
)
|
|
except OSError as exception:
|
|
if exception.errno != errno.EEXIST:
|
|
raise KsftFailEx(
|
|
f"Failed to create netconsole target directory: {exception}"
|
|
) from exception
|
|
|
|
try:
|
|
for key, value in config_data.items():
|
|
path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
|
|
logging.debug("Writing %s to %s", key, path)
|
|
with open(path, "w", encoding="utf-8") as file:
|
|
# Always convert to string to write to file
|
|
file.write(str(value))
|
|
|
|
# Read all configuration values for debugging purposes
|
|
for debug_key in config_data.keys():
|
|
with open(
|
|
f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
|
|
"r",
|
|
encoding="utf-8",
|
|
) as file:
|
|
content = file.read()
|
|
logging.debug(
|
|
"%s/%s/%s : %s",
|
|
NETCONSOLE_CONFIGFS_PATH,
|
|
target_name,
|
|
debug_key,
|
|
content.strip(),
|
|
)
|
|
|
|
except Exception as exception:
|
|
raise KsftFailEx(
|
|
f"Failed to configure netconsole target: {exception}"
|
|
) from exception
|
|
|
|
|
|
def netcons_configure_target(
|
|
cfg: NetDrvEpEnv, interface_name: str, target_name: str
|
|
) -> None:
|
|
"""Configure netconsole on the interface with the given target name"""
|
|
config_data = {
|
|
"extended": "1",
|
|
"dev_name": interface_name,
|
|
"local_port": NETCONS_LOCAL_PORT,
|
|
"remote_port": NETCONS_REMOTE_PORT,
|
|
"local_ip": cfg.addr,
|
|
"remote_ip": cfg.remote_addr,
|
|
"remote_mac": "00:00:00:00:00:00", # Not important for this test
|
|
"enabled": "1",
|
|
}
|
|
|
|
netcons_create_target(config_data, target_name)
|
|
logging.debug(
|
|
"Created netconsole target: %s on interface %s", target_name, interface_name
|
|
)
|
|
|
|
|
|
def netcons_delete_target(name: str) -> None:
|
|
"""Delete a netconsole dynamic target"""
|
|
target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
|
|
try:
|
|
if os.path.exists(target_path):
|
|
os.rmdir(target_path)
|
|
except OSError as exception:
|
|
raise KsftFailEx(
|
|
f"Failed to delete netconsole target: {exception}"
|
|
) from exception
|
|
|
|
|
|
def netcons_load_module() -> None:
|
|
"""Try to load the netconsole module"""
|
|
os.system("modprobe netconsole")
|
|
|
|
|
|
def bpftrace_call() -> None:
|
|
"""Call bpftrace to find how many times netpoll_poll_dev() is called.
|
|
Output is saved in the global variable `maps`"""
|
|
|
|
# This is going to update the global variable, that will be seen by the
|
|
# main function
|
|
global MAPS # pylint: disable=W0603
|
|
|
|
# This will be passed to bpftrace as in bpftrace -e "expr"
|
|
expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
|
|
|
|
MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
|
|
logging.debug("BPFtrace output: %s", MAPS)
|
|
|
|
|
|
def bpftrace_start():
|
|
"""Start a thread to call `call_bpf` in a parallel thread"""
|
|
global BPF_THREAD # pylint: disable=W0603
|
|
|
|
BPF_THREAD = threading.Thread(target=bpftrace_call)
|
|
BPF_THREAD.start()
|
|
if not BPF_THREAD.is_alive():
|
|
raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
|
|
|
|
|
|
def bpftrace_stop() -> None:
|
|
"""Stop the bpftrace thread"""
|
|
if BPF_THREAD:
|
|
BPF_THREAD.join()
|
|
|
|
|
|
def bpftrace_any_hit(join: bool) -> bool:
|
|
"""Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
|
|
if not BPF_THREAD:
|
|
raise KsftFailEx("BPFtrace didn't start")
|
|
|
|
if BPF_THREAD.is_alive():
|
|
if join:
|
|
# Wait for bpftrace to finish
|
|
BPF_THREAD.join()
|
|
else:
|
|
# bpftrace is still running, so, we will not check the result yet
|
|
return False
|
|
|
|
logging.debug("MAPS coming from bpftrace = %s", MAPS)
|
|
if "hits" not in MAPS.keys():
|
|
raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
|
|
|
|
logging.debug("Got a total of %d hits", MAPS["hits"])
|
|
return MAPS["hits"] > 0
|
|
|
|
|
|
def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
|
|
"""Print messages to the console, trying to trigger a netpoll poll"""
|
|
# Start bpftrace in parallel, so, it is watching
|
|
# netpoll_poll_dev() while we are sending netconsole messages
|
|
bpftrace_start()
|
|
defer(bpftrace_stop)
|
|
|
|
do_netpoll_flush(cfg, ifname, target_name)
|
|
|
|
if bpftrace_any_hit(join=True):
|
|
ksft_pr("netpoll_poll_dev() was called. Success")
|
|
return
|
|
|
|
raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
|
|
|
|
|
|
def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
|
|
"""Print messages to the console, trying to trigger a netpoll poll"""
|
|
netcons_configure_target(cfg, ifname, target_name)
|
|
retry = 0
|
|
|
|
for i in range(int(ITERATIONS)):
|
|
if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
|
|
# bpftrace is done, stop sending messages
|
|
break
|
|
|
|
msg = f"netcons test #{i}"
|
|
with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
|
|
for j in range(MAX_WRITES):
|
|
try:
|
|
kmsg.write(f"{msg}-{j}\n")
|
|
except OSError as exception:
|
|
# in some cases, kmsg can be busy, so, we will retry
|
|
time.sleep(1)
|
|
retry += 1
|
|
if retry < 5:
|
|
logging.info("Failed to write to kmsg. Retrying")
|
|
# Just retry a few times
|
|
continue
|
|
raise KsftFailEx(
|
|
f"Failed to write to kmsg: {exception}"
|
|
) from exception
|
|
|
|
netcons_delete_target(target_name)
|
|
netcons_configure_target(cfg, ifname, target_name)
|
|
# If we sleep here, we will have a better chance of triggering
|
|
# This number is based on a few tests I ran while developing this test
|
|
time.sleep(0.4)
|
|
|
|
|
|
def configure_network(ifname: str) -> None:
|
|
"""Configure ring size and queue numbers"""
|
|
|
|
# Set defined queues to 1 to force congestion
|
|
prev_queues = ethtool_get_queues_cnt(ifname)
|
|
logging.debug("RX/TX/combined queues: %s", prev_queues)
|
|
# Only set the queues to 1 if they exists in the device. I.e, they are > 0
|
|
ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
|
|
defer(ethtool_set_queues_cnt, ifname, prev_queues)
|
|
|
|
# Try to set the ring size to some low value.
|
|
# Do not fail if the hardware do not accepted desired values
|
|
prev_ring_size = ethtool_get_ringsize(ifname)
|
|
for size in [(1, 1), (128, 128), (256, 256)]:
|
|
if ethtool_set_ringsize(ifname, size):
|
|
# hardware accepted the desired ringsize
|
|
logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
|
|
break
|
|
defer(ethtool_set_ringsize, ifname, prev_ring_size)
|
|
|
|
|
|
def test_netpoll(cfg: NetDrvEpEnv) -> None:
|
|
"""
|
|
Test netpoll by sending traffic to the interface and then sending
|
|
netconsole messages to trigger a poll
|
|
"""
|
|
|
|
ifname = cfg.ifname
|
|
configure_network(ifname)
|
|
target_name = netcons_generate_random_target_name()
|
|
traffic = None
|
|
|
|
try:
|
|
traffic = GenerateTraffic(cfg)
|
|
do_netpoll_flush_monitored(cfg, ifname, target_name)
|
|
finally:
|
|
if traffic:
|
|
traffic.stop()
|
|
|
|
# Revert RX/TX queues
|
|
netcons_delete_target(target_name)
|
|
|
|
|
|
def test_check_dependencies() -> None:
|
|
"""Check if the dependencies are met"""
|
|
if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
|
|
raise KsftSkipEx(
|
|
f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
|
|
)
|
|
|
|
|
|
def main() -> None:
|
|
"""Main function to run the test"""
|
|
netcons_load_module()
|
|
test_check_dependencies()
|
|
with NetDrvEpEnv(__file__) as cfg:
|
|
ksft_run(
|
|
[test_netpoll],
|
|
args=(cfg,),
|
|
)
|
|
ksft_exit()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|