From 524ab614cb5eb54977c5d46d856339af7ea18750 Mon Sep 17 00:00:00 2001 From: Motomu Utsumi Date: Sun, 15 Oct 2017 16:56:26 +0900 Subject: [PATCH] lkl: Support multiple routing tables This change allows lkl to have multiple routing tables. Multiple routing tables are used for mptcp configuration, for example. Also rewrite route modification code with netlink instead of ioctl. Reviewed-by: Hajime Tazaki Signed-off-by: Motomu Utsumi --- arch/lkl/Kconfig | 8 + arch/lkl/include/uapi/asm/syscalls.h | 1 + tools/lkl/include/lkl.h | 61 ++++++++ tools/lkl/lib/hijack/init.c | 43 +++++- tools/lkl/lib/net.c | 222 +++++++++++++++++++++++---- tools/lkl/tests/hijack-test.sh | 22 +++ 6 files changed, 321 insertions(+), 36 deletions(-) diff --git a/arch/lkl/Kconfig b/arch/lkl/Kconfig index e5bdd4ef5e7e49..d92c5c9512d4bb 100644 --- a/arch/lkl/Kconfig +++ b/arch/lkl/Kconfig @@ -26,6 +26,14 @@ config LKL select HIGH_RES_TIMERS select NET_SCHED select NET_SCH_FQ + select IP_MULTICAST + select IPV6_MULTICAST + select IP_MULTIPLE_TABLES + select IPV6_MULTIPLE_TABLES + select IP_ROUTE_MULTIPATH + select IPV6_ROUTE_MULTIPATH + select IP_ADVANCED_ROUTER + select IPV6_ADVANCED_ROUTER config OUTPUTFORMAT string diff --git a/arch/lkl/include/uapi/asm/syscalls.h b/arch/lkl/include/uapi/asm/syscalls.h index 731cc1ab67b42d..d6e40723b6090e 100644 --- a/arch/lkl/include/uapi/asm/syscalls.h +++ b/arch/lkl/include/uapi/asm/syscalls.h @@ -122,6 +122,7 @@ struct sockaddr { #include #include #include +#include #include #include diff --git a/tools/lkl/include/lkl.h b/tools/lkl/include/lkl.h index 4c70c96d9e8506..432fef90e3c0c7 100644 --- a/tools/lkl/include/lkl.h +++ b/tools/lkl/include/lkl.h @@ -307,6 +307,18 @@ int lkl_if_set_ipv4(int ifindex, unsigned int addr, unsigned int netmask_len); */ int lkl_set_ipv4_gateway(unsigned int addr); +/** + * lkl_if_set_ipv4_gateway - add an IPv4 default route in rule table + * + * @ifindex - the ifindex of the interface, used for tableid calculation + * @addr - 4-byte IP address of the interface + * @netmask_len - prefix length of the @addr + * @gw_addr - 4-byte IP address of the gateway + * @returns - return 0 if no error: otherwise negative value returns + */ +int lkl_if_set_ipv4_gateway(int ifindex, unsigned int addr, + unsigned int netmask_len, unsigned int gw_addr); + /** * lkl_if_set_ipv6 - set IPv6 address on interface * must be called after interface is up. @@ -326,6 +338,18 @@ int lkl_if_set_ipv6(int ifindex, void* addr, unsigned int netprefix_len); */ int lkl_set_ipv6_gateway(void* addr); +/** + * lkl_if_set_ipv6_gateway - add an IPv6 default route in rule table + * + * @ifindex - the ifindex of the interface, used for tableid calculation + * @addr - 16-byte IP address of the interface + * @netmask_len - prefix length of the @addr + * @gw_addr - 16-byte IP address of the gateway (i.e., struct in_addr) + * @returns - return 0 if no error: otherwise negative value returns + */ +int lkl_if_set_ipv6_gateway(int ifindex, void *addr, + unsigned int netmask_len, void *gw_addr); + /** * lkl_netdev - host network device handle, defined in lkl_host.h. */ @@ -471,6 +495,43 @@ int lkl_if_add_ip(int ifindex, int af, void *addr, unsigned int netprefix_len); */ int lkl_if_del_ip(int ifindex, int af, void *addr, unsigned int netprefix_len); +/** + * lkl_add_gateway - add a gateway + * @af - address family of the ip address. Must be LKL_AF_INET or LKL_AF_INET6 + * @gwaddr - 4-byte IP address of the gateway (i.e., struct in_addr) + */ +int lkl_add_gateway(int af, void *gwaddr); + +/** + * XXX Should I use OIF selector? + * temporary table idx = ifindex * 2 + 0 <- ipv4 + * temporary table idx = ifindex * 2 + 1 <- ipv6 + */ +/** + * lkl_if_add_rule_from_addr - create an ip rule table with "from" selector + * @ifindex - the ifindex of the interface, used for table id calculation + * @af - address family of the ip address. Must be LKL_AF_INET or LKL_AF_INET6 + * @saddr - network byte order ip address, "from" selector address of this rule + */ +int lkl_if_add_rule_from_saddr(int ifindex, int af, void *saddr); + +/** + * lkl_if_add_gateway - add gateway to rule table + * @ifindex - the ifindex of the interface, used for table id calculation + * @af - address family of the ip address. Must be LKL_AF_INET or LKL_AF_INET6 + * @gwaddr - 4-byte IP address of the gateway (i.e., struct in_addr) + */ +int lkl_if_add_gateway(int ifindex, int af, void *gwaddr); + +/** + * lkl_if_add_linklocal - add linklocal route to rule table + * @ifindex - the ifindex of the interface, used for table id calculation + * @af - address family of the ip address. Must be LKL_AF_INET or LKL_AF_INET6 + * @addr - ip address of the entry in network byte order + * @netprefix_len - prefix length of the @addr + */ +int lkl_if_add_linklocal(int ifindex, int af, void *addr, int netprefix_len); + /** * lkl_if_wait_ipv6_dad - wait for DAD to be done for a ipv6 address * must be called after interface is up diff --git a/tools/lkl/lib/hijack/init.c b/tools/lkl/lib/hijack/init.c index 1717f0301de92b..b008a949d35ca3 100644 --- a/tools/lkl/lib/hijack/init.c +++ b/tools/lkl/lib/hijack/init.c @@ -196,6 +196,8 @@ hijack_init(void) char *mac_str = getenv("LKL_HIJACK_NET_MAC"); char *netmask_len = getenv("LKL_HIJACK_NET_NETMASK_LEN"); char *netmask6_len = getenv("LKL_HIJACK_NET_NETMASK6_LEN"); + char *ifgateway = getenv("LKL_HIJACK_NET_IFGATEWAY"); + char *ifgateway6 = getenv("LKL_HIJACK_NET_IFGATEWAY6"); char *gateway = getenv("LKL_HIJACK_NET_GATEWAY"); char *gateway6 = getenv("LKL_HIJACK_NET_GATEWAY6"); char *debug = getenv("LKL_HIJACK_DEBUG"); @@ -398,13 +400,25 @@ hijack_init(void) fprintf(stderr, "failed to set IPv4 address: %s\n", lkl_strerror(ret)); } + if (ifgateway) { + unsigned int gwaddr = inet_addr(ifgateway); + + if (gwaddr != INADDR_NONE) { + ret = lkl_if_set_ipv4_gateway(nd_ifindex, + addr, nmlen, gwaddr); + if (ret < 0) + fprintf(stderr, + "failed to set v4 if gw: %s\n", + lkl_strerror(ret)); + } + } } if (nd_ifindex >= 0 && gateway) { - unsigned int addr = inet_addr(gateway); + unsigned int gwaddr = inet_addr(gateway); - if (addr != INADDR_NONE) { - ret = lkl_set_ipv4_gateway(addr); + if (gwaddr != INADDR_NONE) { + ret = lkl_set_ipv4_gateway(gwaddr); if (ret< 0) fprintf(stderr, "failed to set IPv4 gateway: %s\n", lkl_strerror(ret)); @@ -423,16 +437,31 @@ hijack_init(void) fprintf(stderr, "failed to set IPv6address: %s\n", lkl_strerror(ret)); } + if (ifgateway6) { + char gwaddr[16]; + + if (inet_pton(AF_INET6, ifgateway6, gwaddr) != 1) { + fprintf(stderr, "Invalid ipv6 gateway: %s\n", + ifgateway6); + } else { + ret = lkl_if_set_ipv6_gateway(nd_ifindex, + &addr, pflen, gwaddr); + if (ret < 0) + fprintf(stderr, + "failed to set v6 if gw: %s\n", + lkl_strerror(ret)); + } + } } if (nd_ifindex >= 0 && gateway6) { - char gw[16]; + char gwaddr[16]; - if (inet_pton(AF_INET6, gateway6, gw) != 1) { + if (inet_pton(AF_INET6, gateway6, gwaddr) != 1) { fprintf(stderr, "Invalid ipv6 gateway: %s\n", gateway6); } else { - ret = lkl_set_ipv6_gateway(gw); - if (ret< 0) + ret = lkl_set_ipv6_gateway(gwaddr); + if (ret < 0) fprintf(stderr, "failed to set IPv6 gateway: %s\n", lkl_strerror(ret)); } diff --git a/tools/lkl/lib/net.c b/tools/lkl/lib/net.c index ffd1f43ac9522e..f9faeb57509426 100644 --- a/tools/lkl/lib/net.c +++ b/tools/lkl/lib/net.c @@ -90,40 +90,24 @@ int lkl_if_set_ipv4(int ifindex, unsigned int addr, unsigned int netmask_len) return lkl_if_add_ip(ifindex, LKL_AF_INET, &addr, netmask_len); } -int lkl_set_ipv4_gateway(unsigned int addr) +int lkl_if_set_ipv4_gateway(int ifindex, unsigned int src_addr, + unsigned int src_masklen, unsigned int via_addr) { - struct lkl_rtentry re; - int err, sock = lkl_sys_socket(LKL_AF_INET, LKL_SOCK_DGRAM, 0); - - if (sock < 0) - return sock; - - memset(&re, 0, sizeof(re)); - set_sockaddr((struct lkl_sockaddr_in *) &re.rt_dst, 0, 0); - set_sockaddr((struct lkl_sockaddr_in *) &re.rt_genmask, 0, 0); - set_sockaddr((struct lkl_sockaddr_in *) &re.rt_gateway, addr, 0); - re.rt_flags = LKL_RTF_UP | LKL_RTF_GATEWAY; - err = lkl_sys_ioctl(sock, LKL_SIOCADDRT, (long)&re); - lkl_sys_close(sock); + int err; - return err; + err = lkl_if_add_rule_from_saddr(ifindex, LKL_AF_INET, &src_addr); + if (err) + return err; + err = lkl_if_add_linklocal(ifindex, LKL_AF_INET, + &src_addr, src_masklen); + if (err) + return err; + return lkl_if_add_gateway(ifindex, LKL_AF_INET, &via_addr); } -int lkl_set_ipv6_gateway(void* addr) +int lkl_set_ipv4_gateway(unsigned int addr) { - int err, sock; - struct lkl_in6_rtmsg route; - - sock = lkl_sys_socket(LKL_AF_INET6, LKL_SOCK_DGRAM, 0); - if (sock < 0) - return sock; - memset(&route, 0, sizeof(route)); - memcpy(&route.rtmsg_gateway, addr, sizeof(struct lkl_in6_addr)); - route.rtmsg_flags = LKL_RTF_UP | LKL_RTF_GATEWAY; - - err = lkl_sys_ioctl(sock, LKL_SIOCADDRT, (long)&route); - lkl_sys_close(sock); - return err; + return lkl_add_gateway(LKL_AF_INET, &addr); } int lkl_netdev_get_ifindex(int id) @@ -355,6 +339,26 @@ int lkl_if_set_ipv6(int ifindex, void *addr, unsigned int netprefix_len) return lkl_if_wait_ipv6_dad(ifindex, addr); } +int lkl_if_set_ipv6_gateway(int ifindex, void *src_addr, + unsigned int src_masklen, void *via_addr) +{ + int err; + + err = lkl_if_add_rule_from_saddr(ifindex, LKL_AF_INET6, src_addr); + if (err) + return err; + err = lkl_if_add_linklocal(ifindex, LKL_AF_INET6, + src_addr, src_masklen); + if (err) + return err; + return lkl_if_add_gateway(ifindex, LKL_AF_INET6, via_addr); +} + +int lkl_set_ipv6_gateway(void *addr) +{ + return lkl_add_gateway(LKL_AF_INET6, addr); +} + /* returns: * 0 - succeed. * < 0 - error number. @@ -530,6 +534,166 @@ int lkl_if_del_ip(int ifindex, int af, void *addr, unsigned int netprefix_len) addr, netprefix_len); } +static int iproute_modify(int cmd, unsigned int flags, int ifindex, int af, + void *route_addr, int route_masklen, void *gwaddr) +{ + struct { + struct lkl_nlmsghdr n; + struct lkl_rtmsg r; + char buf[1024]; + } req = { + .n.nlmsg_len = LKL_NLMSG_LENGTH(sizeof(struct lkl_rtmsg)), + .n.nlmsg_flags = LKL_NLM_F_REQUEST | flags, + .n.nlmsg_type = cmd, + .r.rtm_family = af, + .r.rtm_table = LKL_RT_TABLE_MAIN, + .r.rtm_scope = LKL_RT_SCOPE_UNIVERSE, + }; + int err, addr_sz; + int i, fd; + + fd = netlink_sock(0); + if (fd < 0) { + lkl_printf("netlink_sock error: %d\n", fd); + return fd; + } + + if (af == LKL_AF_INET) + addr_sz = 4; + else if (af == LKL_AF_INET6) + addr_sz = 16; + else { + lkl_printf("Bad address family: %d\n", af); + return -1; + } + + if (cmd != LKL_RTM_DELROUTE) { + req.r.rtm_protocol = LKL_RTPROT_BOOT; + req.r.rtm_scope = LKL_RT_SCOPE_UNIVERSE; + req.r.rtm_type = LKL_RTN_UNICAST; + } + + if (gwaddr) + addattr_l(&req.n, sizeof(req), + LKL_RTA_GATEWAY, gwaddr, addr_sz); + + if (af == LKL_AF_INET && route_addr) { + unsigned int netaddr = *(unsigned int *)route_addr; + + netaddr = ntohl(netaddr); + netaddr = (netaddr >> (32 - route_masklen)); + netaddr = (netaddr << (32 - route_masklen)); + netaddr = htonl(netaddr); + *(unsigned int *)route_addr = netaddr; + req.r.rtm_dst_len = route_masklen; + addattr_l(&req.n, sizeof(req), LKL_RTA_DST, + route_addr, addr_sz); + } + + if (af == LKL_AF_INET6 && route_addr) { + struct lkl_in6_addr netaddr = + *(struct lkl_in6_addr *)route_addr; + int rmbyte = route_masklen/8; + int rmbit = route_masklen%8; + + for (i = 0; i < rmbyte; i++) + netaddr.in6_u.u6_addr8[16-i] = 0; + netaddr.in6_u.u6_addr8[16-rmbyte] = + (netaddr.in6_u.u6_addr8[16-rmbyte] >> rmbit); + netaddr.in6_u.u6_addr8[16-rmbyte] = + (netaddr.in6_u.u6_addr8[16-rmbyte] << rmbit); + *(struct lkl_in6_addr *)route_addr = netaddr; + req.r.rtm_dst_len = route_masklen; + addattr_l(&req.n, sizeof(req), LKL_RTA_DST, + route_addr, addr_sz); + } + + if (ifindex != LKL_RT_TABLE_MAIN) { + if (af == LKL_AF_INET) + req.r.rtm_table = ifindex * 2; + else if (af == LKL_AF_INET6) + req.r.rtm_table = ifindex * 2 + 1; + addattr_l(&req.n, sizeof(req), LKL_RTA_OIF, &ifindex, addr_sz); + } + err = rtnl_talk(fd, &req.n); + lkl_sys_close(fd); + return err; +} + +int lkl_if_add_linklocal(int ifindex, int af, void *addr, int netprefix_len) +{ + return iproute_modify(LKL_RTM_NEWROUTE, LKL_NLM_F_CREATE|LKL_NLM_F_EXCL, + ifindex, af, addr, netprefix_len, NULL); +} + +int lkl_if_add_gateway(int ifindex, int af, void *gwaddr) +{ + return iproute_modify(LKL_RTM_NEWROUTE, LKL_NLM_F_CREATE|LKL_NLM_F_EXCL, + ifindex, af, NULL, 0, gwaddr); +} + +int lkl_add_gateway(int af, void *gwaddr) +{ + return iproute_modify(LKL_RTM_NEWROUTE, LKL_NLM_F_CREATE|LKL_NLM_F_EXCL, + LKL_RT_TABLE_MAIN, af, NULL, 0, gwaddr); +} + +static int iprule_modify(int cmd, int ifindex, int af, void *saddr) +{ + struct { + struct lkl_nlmsghdr n; + struct lkl_rtmsg r; + char buf[1024]; + } req = { + .n.nlmsg_type = cmd, + .n.nlmsg_len = LKL_NLMSG_LENGTH(sizeof(struct lkl_rtmsg)), + .n.nlmsg_flags = LKL_NLM_F_REQUEST, + .r.rtm_protocol = LKL_RTPROT_BOOT, + .r.rtm_scope = LKL_RT_SCOPE_UNIVERSE, + .r.rtm_family = af, + .r.rtm_type = LKL_RTN_UNSPEC, + }; + int fd, err; + int addr_sz; + + if (af == LKL_AF_INET) + addr_sz = 4; + else if (af == LKL_AF_INET6) + addr_sz = 16; + else { + lkl_printf("Bad address family: %d\n", af); + return -1; + } + + fd = netlink_sock(0); + if (fd < 0) + return fd; + + if (cmd == LKL_RTM_NEWRULE) { + req.n.nlmsg_flags |= LKL_NLM_F_CREATE|LKL_NLM_F_EXCL; + req.r.rtm_type = LKL_RTN_UNICAST; + } + + //set from address + req.r.rtm_src_len = 8 * addr_sz; + addattr_l(&req.n, sizeof(req), LKL_FRA_SRC, saddr, addr_sz); + + //use ifindex as table id + if (af == LKL_AF_INET) + req.r.rtm_table = ifindex * 2; + else if (af == LKL_AF_INET6) + req.r.rtm_table = ifindex * 2 + 1; + err = rtnl_talk(fd, &req.n); + + lkl_sys_close(fd); + return err; +} + +int lkl_if_add_rule_from_saddr(int ifindex, int af, void *saddr) +{ + return iprule_modify(LKL_RTM_NEWRULE, ifindex, af, saddr); +} + static int qdisc_add(int cmd, int flags, int ifindex, char *root, char *type) { diff --git a/tools/lkl/tests/hijack-test.sh b/tools/lkl/tests/hijack-test.sh index 0e0bb389bf6fdd..7ac8b8fe941656 100755 --- a/tools/lkl/tests/hijack-test.sh +++ b/tools/lkl/tests/hijack-test.sh @@ -150,6 +150,28 @@ echo "$qdisc" echo "$qdisc" | grep "qdisc fq" > /dev/null echo "$qdisc" | grep throttled > /dev/null +# Make sure our device has ipv4 rule we expect +addr=$(LKL_HIJACK_NET_IFGATEWAY=192.168.13.5 \ + ${hijack_script} ip rule show) +echo "$addr" | grep 192.168.13.2 + +# Make sure our device has ipv6 rule we expect +addr=$(LKL_HIJACK_NET_IFGATEWAY6=fc03::5 \ + ${hijack_script} ip -6 rule show) +echo "$addr" | grep fc03::2 + +# Make sure our device has ipv4 rule table +addr=$(LKL_HIJACK_NET_IFGATEWAY=192.168.13.5 \ + ${hijack_script} ip route show table 4) +echo "$addr" | grep 192.168.13.5 +echo "$addr" | grep 192.168.13.0 + +# Make sure our device has ipv6 rule table +addr=$(LKL_HIJACK_NET_IFGATEWAY6=fc03::5 \ + ${hijack_script} ip -6 route show table 5) +echo "$addr" | grep fc03::5 +echo "$addr" | grep fc03:: + if [ -z "`printenv CONFIG_AUTO_LKL_VIRTIO_NET_VDE`" ]; then exit 0 fi