From 93459a94976fc07f6d9fbde99c12940a8132a365 Mon Sep 17 00:00:00 2001 From: Michal Opala Date: Thu, 26 Sep 2024 18:19:48 +0200 Subject: [PATCH] F OpenNebula/one#6641: Add the "string-phone" transparent proxy (#3221) This feature let's cloud admin to proxy VM connections to any service through the hypervisor. VMs uses a link local IP that is forwarded to a local proxy. This simplifies VM network requirements, e.g. to access gateway, vaults, configuration services etc... Implementation: - Use network namespaces to isolate VNET networking. ip netns command is executed through a wrapper to limit sudo access to commands. - Add tproxy.rb app to manage a group of daemons on HV nodes. - Use unix sockets for communication between proxy peers. "Inner" proxy runs in the netns without any network access. "Outer" proxy handles HV connections to services. - Use OpenNebulaNetwork.conf + 'onehost sync -f' for configuration. Proxy can be defined per network. NOTE: This commits does not include modifications to OpenNebulaNetwork.conf (cherry picked from commit b1ef4332e54cf0d9c762906743f20f30d38289a5) --- install.sh | 5 +- share/pkgs/sudoers/centos/opennebula | 4 +- share/pkgs/sudoers/debian/opennebula | 4 +- share/pkgs/sudoers/opennebula-node-kvm | 1 + share/pkgs/sudoers/opennebula-node-lxc | 1 + share/sudoers/sudoers.rb | 25 +- src/vnm_mad/remotes/lib/command.rb | 68 +++- src/vnm_mad/remotes/lib/ip_netns_exec | 3 + src/vnm_mad/remotes/lib/no_vlan.rb | 20 +- src/vnm_mad/remotes/lib/tproxy | 477 +++++++++++++++++++++++++ src/vnm_mad/remotes/lib/tproxy.rb | 250 +++++++++++++ src/vnm_mad/remotes/lib/vlan.rb | 20 +- src/vnm_mad/remotes/lib/vnmmad.rb | 6 +- 13 files changed, 845 insertions(+), 39 deletions(-) create mode 100755 src/vnm_mad/remotes/lib/ip_netns_exec create mode 100755 src/vnm_mad/remotes/lib/tproxy create mode 100644 src/vnm_mad/remotes/lib/tproxy.rb diff --git a/install.sh b/install.sh index c8b3b3ef50..ce5ebb68ee 100755 --- a/install.sh +++ b/install.sh @@ -1589,7 +1589,10 @@ NETWORK_FILES="src/vnm_mad/remotes/lib/vnm_driver.rb \ src/vnm_mad/remotes/lib/no_vlan.rb \ src/vnm_mad/remotes/lib/security_groups.rb \ src/vnm_mad/remotes/lib/security_groups_iptables.rb \ - src/vnm_mad/remotes/lib/nic.rb" + src/vnm_mad/remotes/lib/nic.rb \ + src/vnm_mad/remotes/lib/tproxy \ + src/vnm_mad/remotes/lib/tproxy.rb \ + src/vnm_mad/remotes/lib/ip_netns_exec" NETWORK_8021Q_FILES="src/vnm_mad/remotes/802.1Q/clean \ src/vnm_mad/remotes/802.1Q/post \ diff --git a/share/pkgs/sudoers/centos/opennebula b/share/pkgs/sudoers/centos/opennebula index 54a01e83bc..96776b1f41 100644 --- a/share/pkgs/sudoers/centos/opennebula +++ b/share/pkgs/sudoers/centos/opennebula @@ -6,7 +6,8 @@ Cmnd_Alias ONE_HA = /usr/bin/systemctl start opennebula-flow, /usr/bin/systemctl Cmnd_Alias ONE_LVM = /usr/sbin/lvcreate, /usr/sbin/lvremove, /usr/sbin/lvs, /usr/sbin/vgdisplay, /usr/sbin/lvchange, /usr/sbin/lvscan, /usr/sbin/lvextend Cmnd_Alias ONE_LXC = /usr/bin/mount, /usr/bin/umount, /usr/bin/bindfs, /usr/sbin/losetup, /usr/bin/qemu-nbd, /usr/bin/lxc-attach, /usr/bin/lxc-config, /usr/bin/lxc-create, /usr/bin/lxc-destroy, /usr/bin/lxc-info, /usr/bin/lxc-ls, /usr/bin/lxc-start, /usr/bin/lxc-stop, /usr/bin/lxc-console, /usr/sbin/e2fsck, /usr/sbin/resize2fs, /usr/sbin/xfs_growfs, /usr/bin/rbd-nbd Cmnd_Alias ONE_MARKET = /usr/lib/one/sh/create_container_image.sh -Cmnd_Alias ONE_NET = /usr/sbin/ebtables, /usr/sbin/iptables, /usr/sbin/ip6tables, /usr/sbin/ipset, /usr/sbin/ip link *, /usr/sbin/ip neighbour *, /usr/sbin/ip route *, /usr/sbin/ip rule *, /usr/sbin/ip tuntap * +Cmnd_Alias ONE_NET = /usr/sbin/ebtables, /usr/sbin/iptables, /usr/sbin/ip6tables, /usr/sbin/ipset, /usr/sbin/ip link *, /usr/sbin/ip neighbour *, /usr/sbin/ip route *, /usr/sbin/ip rule *, /usr/sbin/ip tuntap *, /usr/sbin/nft, /var/tmp/one/vnm/tproxy +Cmnd_Alias ONE_NETNS = /usr/sbin/ip netns add *, /usr/sbin/ip netns delete *, /usr/sbin/ip netns pids *, /var/tmp/one/vnm/ip_netns_exec ip address *, /var/tmp/one/vnm/ip_netns_exec ip link *, /var/tmp/one/vnm/ip_netns_exec ip -j link show *, /var/tmp/one/vnm/ip_netns_exec ip route * Cmnd_Alias ONE_OVS = /usr/bin/ovs-ofctl, /usr/bin/ovs-vsctl, /usr/bin/ovs-appctl Cmnd_Alias ONE_MEM = /usr/sbin/sysctl vm.drop_caches=3 vm.compact_memory=1 Cmnd_Alias ONE_VGPU = /var/tmp/one/vgpu @@ -14,3 +15,4 @@ Cmnd_Alias ONE_VGPU = /var/tmp/one/vgpu ## Command aliases are enabled individually in dedicated ## sudoers files by each OpenNebula component (server, node). # oneadmin ALL=(ALL) NOPASSWD: ONE_CEPH, ONE_HA, ONE_LVM, ONE_LXC, ONE_MARKET, ONE_NET, ONE_OVS, ONE_MEM +# oneadmin ALL=(ALL) NOPASSWD:SETENV: ONE_NETNS diff --git a/share/pkgs/sudoers/debian/opennebula b/share/pkgs/sudoers/debian/opennebula index 29f0fa9798..5533a939af 100644 --- a/share/pkgs/sudoers/debian/opennebula +++ b/share/pkgs/sudoers/debian/opennebula @@ -6,7 +6,8 @@ Cmnd_Alias ONE_HA = /usr/bin/systemctl start opennebula-flow, /usr/bin/systemctl Cmnd_Alias ONE_LVM = /usr/sbin/lvcreate, /usr/sbin/lvremove, /usr/sbin/lvs, /usr/sbin/vgdisplay, /usr/sbin/lvchange, /usr/sbin/lvscan, /usr/sbin/lvextend Cmnd_Alias ONE_LXC = /usr/bin/mount, /usr/bin/umount, /usr/bin/bindfs, /usr/sbin/losetup, /usr/bin/qemu-nbd, /usr/bin/lxc-attach, /usr/bin/lxc-config, /usr/bin/lxc-create, /usr/bin/lxc-destroy, /usr/bin/lxc-info, /usr/bin/lxc-ls, /usr/bin/lxc-start, /usr/bin/lxc-stop, /usr/bin/lxc-console, /usr/sbin/e2fsck, /usr/sbin/resize2fs, /usr/sbin/xfs_growfs, /usr/bin/rbd-nbd Cmnd_Alias ONE_MARKET = /usr/lib/one/sh/create_container_image.sh -Cmnd_Alias ONE_NET = /usr/sbin/ebtables, /usr/sbin/iptables, /usr/sbin/ip6tables, /usr/sbin/ipset, /usr/sbin/ip link *, /usr/sbin/ip neighbour *, /usr/sbin/ip route *, /usr/sbin/ip rule *, /usr/sbin/ip tuntap * +Cmnd_Alias ONE_NET = /usr/sbin/ebtables, /usr/sbin/iptables, /usr/sbin/ip6tables, /usr/sbin/ipset, /usr/sbin/ip link *, /usr/sbin/ip neighbour *, /usr/sbin/ip route *, /usr/sbin/ip rule *, /usr/sbin/ip tuntap *, /usr/sbin/nft, /var/tmp/one/vnm/tproxy +Cmnd_Alias ONE_NETNS = /usr/sbin/ip netns add *, /usr/sbin/ip netns delete *, /usr/sbin/ip netns pids *, /var/tmp/one/vnm/ip_netns_exec ip address *, /var/tmp/one/vnm/ip_netns_exec ip link *, /var/tmp/one/vnm/ip_netns_exec ip -j link show *, /var/tmp/one/vnm/ip_netns_exec ip route * Cmnd_Alias ONE_OVS = /usr/bin/ovs-ofctl, /usr/bin/ovs-vsctl, /usr/bin/ovs-appctl Cmnd_Alias ONE_MEM = /usr/sbin/sysctl vm.drop_caches=3 vm.compact_memory=1 Cmnd_Alias ONE_VGPU = /var/tmp/one/vgpu @@ -14,3 +15,4 @@ Cmnd_Alias ONE_VGPU = /var/tmp/one/vgpu ## Command aliases are enabled individually in dedicated ## sudoers files by each OpenNebula component (server, node). # oneadmin ALL=(ALL) NOPASSWD: ONE_CEPH, ONE_HA, ONE_LVM, ONE_LXC, ONE_MARKET, ONE_NET, ONE_OVS, ONE_MEM +# oneadmin ALL=(ALL) NOPASSWD:SETENV: ONE_NETNS diff --git a/share/pkgs/sudoers/opennebula-node-kvm b/share/pkgs/sudoers/opennebula-node-kvm index 86f9b016ce..f1a0c1b022 100644 --- a/share/pkgs/sudoers/opennebula-node-kvm +++ b/share/pkgs/sudoers/opennebula-node-kvm @@ -1 +1,2 @@ oneadmin ALL=(ALL:ALL) NOPASSWD: ONE_CEPH, ONE_NET, ONE_OVS, ONE_LVM, ONE_MEM, ONE_VGPU +oneadmin ALL=(ALL:ALL) NOPASSWD:SETENV: ONE_NETNS diff --git a/share/pkgs/sudoers/opennebula-node-lxc b/share/pkgs/sudoers/opennebula-node-lxc index bbb87079c2..fc2c5a6eea 100644 --- a/share/pkgs/sudoers/opennebula-node-lxc +++ b/share/pkgs/sudoers/opennebula-node-lxc @@ -1 +1,2 @@ oneadmin ALL=(ALL:ALL) NOPASSWD: ONE_LXC, ONE_NET, ONE_OVS, ONE_CEPH, ONE_LVM +oneadmin ALL=(ALL:ALL) NOPASSWD:SETENV: ONE_NETNS diff --git a/share/sudoers/sudoers.rb b/share/sudoers/sudoers.rb index 1c0fde1382..d2be3686ba 100644 --- a/share/sudoers/sudoers.rb +++ b/share/sudoers/sudoers.rb @@ -17,7 +17,7 @@ # Holds configuration about sudoers requirements for OpeNebula class Sudoers - NODECMDS = [:NET, :OVS, :LVM, :LXC, :MEM, :VGPU] + NODECMDS = [:NET, :NETNS, :OVS, :LVM, :LXC, :MEM, :VGPU] attr_accessor :cmds @@ -33,13 +33,24 @@ class Sudoers 'ip neighbour *', 'ip route *', 'ip rule *', - 'ip tuntap *' + 'ip tuntap *', + 'nft', + '/var/tmp/one/vnm/tproxy' ], - :LVM => [ + :NETNS => [ + 'ip netns add *', + 'ip netns delete *', + 'ip netns pids *', + '/var/tmp/one/vnm/ip_netns_exec ip address *', + '/var/tmp/one/vnm/ip_netns_exec ip link *', + '/var/tmp/one/vnm/ip_netns_exec ip -j link show *', + '/var/tmp/one/vnm/ip_netns_exec ip route *' + ], + :LVM => [ 'lvcreate', 'lvremove', 'lvs', 'vgdisplay', 'lvchange', 'lvscan', 'lvextend' ], - :OVS => ['ovs-ofctl', 'ovs-vsctl'], - :CEPH => ['rbd'], + :OVS => ['ovs-ofctl', 'ovs-vsctl'], + :CEPH => ['rbd'], :HA => [ 'systemctl start opennebula-flow', 'systemctl stop opennebula-flow', @@ -64,8 +75,8 @@ class Sudoers 'lxc-console', 'e2fsck', 'resize2fs', 'xfs_growfs', 'rbd-nbd' ], :MARKET => ["#{lib_location}/sh/create_container_image.sh"], - :MEM => ['sysctl vm.drop_caches=3 vm.compact_memory=1'], - :VGPU => ['sudo', '/var/tmp/one/vgpu'] + :MEM => ['sysctl vm.drop_caches=3 vm.compact_memory=1'], + :VGPU => ['sudo', '/var/tmp/one/vgpu'] } end diff --git a/src/vnm_mad/remotes/lib/command.rb b/src/vnm_mad/remotes/lib/command.rb index 4686972ebc..549afe230b 100644 --- a/src/vnm_mad/remotes/lib/command.rb +++ b/src/vnm_mad/remotes/lib/command.rb @@ -25,17 +25,20 @@ module VNMMAD # to local installations. Any modification requires to sync the hosts # with onehost sync command. COMMANDS = { - :ebtables => 'sudo -n ebtables --concurrent', - :iptables => 'sudo -n iptables -w 3 -W 20000', - :ip6tables => 'sudo -n ip6tables -w 3 -W 20000', - :ip => 'sudo -n ip', - :ip_unpriv => 'ip', - :virsh => 'virsh -c qemu:///system', - :ovs_vsctl => 'sudo -n ovs-vsctl', - :ovs_ofctl => 'sudo -n ovs-ofctl', - :ovs_appctl => 'sudo -n ovs-appctl', - :lsmod => 'lsmod', - :ipset => 'sudo -n ipset' + :ebtables => 'sudo -n ebtables --concurrent', + :iptables => 'sudo -n iptables -w 3 -W 20000', + :ip6tables => 'sudo -n ip6tables -w 3 -W 20000', + :ip => 'sudo -n ip', + :ip_unpriv => 'ip', + :virsh => 'virsh -c qemu:///system', + :ovs_vsctl => 'sudo -n ovs-vsctl', + :ovs_ofctl => 'sudo -n ovs-ofctl', + :ovs_appctl => 'sudo -n ovs-appctl', + :lsmod => 'lsmod', + :ipset => 'sudo -n ipset', + :nft => 'sudo -n nft', + :tproxy => 'sudo -n /var/tmp/one/vnm/tproxy', + :ip_netns_exec => 'sudo -nE /var/tmp/one/vnm/ip_netns_exec' } # Adjust :ip[6]tables commands to work with legacy versions @@ -129,6 +132,49 @@ module VNMMAD Open3.capture3(cmd_str) end + # Executes a command (paranoid version) + # @return [String, String, Process::Status] the standard output, + # standard error and + # status returned by + # Open3.capture3 + def self.no_shell(sym, *args, **opts) + terminate = (t = opts.delete(:term)).nil? ? true : t + + if args[0].is_a?(Hash) + env = args[0] + cmd = COMMANDS[sym].split(' ') + args[1..(-1)].to_a + else + env = {} + cmd = COMMANDS[sym].split(' ') + args[0..(-1)].to_a + end + + o, e, s = Open3.capture3(env, *cmd, **opts) + + env = env.empty? ? '' : env.map {|k, v| "#{k}='#{v}' " }.join + cmd = cmd.join(' ') + + if s.success? + OpenNebula.log_info "Executed \"#{env}#{cmd}\"." + OpenNebula.log_info Base64.strict_encode64(opts[:stdin_data]) \ + unless opts[:stdin_data].nil? + else + if terminate + OpenNebula.log_error "Command \"#{env}#{cmd}\" failed." + OpenNebula.log_error Base64.strict_encode64(opts[:stdin_data]) \ + unless opts[:stdin_data].nil? + OpenNebula.log_error e + exit(s.exitstatus) + else + OpenNebula.log_error "Command \"#{env}#{cmd}\" failed (recovered)." + OpenNebula.log_error Base64.strict_encode64(opts[:stdin_data]) \ + unless opts[:stdin_data].nil? + OpenNebula.log_error e + end + end + + [o, e, s] + end + end end diff --git a/src/vnm_mad/remotes/lib/ip_netns_exec b/src/vnm_mad/remotes/lib/ip_netns_exec new file mode 100755 index 0000000000..4c92dbd98b --- /dev/null +++ b/src/vnm_mad/remotes/lib/ip_netns_exec @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -eu +exec /usr/sbin/ip netns exec "$NETNS" "$@" diff --git a/src/vnm_mad/remotes/lib/no_vlan.rb b/src/vnm_mad/remotes/lib/no_vlan.rb index b3e41b919e..6706ccd07f 100644 --- a/src/vnm_mad/remotes/lib/no_vlan.rb +++ b/src/vnm_mad/remotes/lib/no_vlan.rb @@ -42,6 +42,9 @@ module VNMMAD # Create the bridge. create_bridge(@nic) + # Setup transparent proxies. + TProxy.setup_tproxy(@nic, :up) + # Skip if vlan device is already in the bridge. next if !@nic[:phydev] || @nic[:phydev].empty? || @bridges[@nic[:bridge]].include?(@nic[:phydev]) @@ -82,19 +85,14 @@ module VNMMAD # vlan) next unless @bridges.include? @nic[:bridge] - # Skip if we want to keep the empty bridge - next if @nic[:conf][:keep_empty_bridge] + guests = @bridges[@nic[:bridge]] \ + - [@nic[:phydev], "#{@nic[:bridge]}b"] - # Skip if the phydev device is not the only left device in - # the bridge.A - if @nic[:phydev].nil? - keep = !@bridges[@nic[:bridge]].empty? - else - keep = @bridges[@nic[:bridge]].length > 1 || - !@bridges[@nic[:bridge]].include?(@nic[:phydev]) - end + # Setup transparent proxies. + TProxy.setup_tproxy(@nic, :down) if guests.count < 1 - next if keep + # Skip the bridge removal (on demand or when still in use). + next if @nic[:conf][:keep_empty_bridge] || guests.count > 0 # Delete the bridge. OpenNebula.exec_and_log("#{command(:ip)} link delete #{@nic[:bridge]}") diff --git a/src/vnm_mad/remotes/lib/tproxy b/src/vnm_mad/remotes/lib/tproxy new file mode 100755 index 0000000000..d554c4fb43 --- /dev/null +++ b/src/vnm_mad/remotes/lib/tproxy @@ -0,0 +1,477 @@ +#!/usr/bin/env ruby +# -------------------------------------------------------------------------- # +# Copyright 2002-2024, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # + +# frozen_string_literal: true + +# rubocop:disable Lint/MissingCopEnableDirective +# rubocop:disable Metrics/ParameterLists +# rubocop:disable Style/Documentation +# rubocop:disable Style/GlobalVars +# rubocop:disable Style/ParallelAssignment +# rubocop:disable Style/RegexpLiteral +# rubocop:disable Style/GuardClause + +RUBY_LIB_LOCATION = '/usr/lib/one/ruby' +GEMS_LOCATION = '/usr/share/one/gems' +LOG_LOCATION = '/var/log' +RUN_LOCATION = '/var/run' +REMOTES_LOCATION = '/var/tmp/one' +CONFIGURATION_FILE = REMOTES_LOCATION + '/etc/vnm/OpenNebulaNetwork.conf' + +# %%RUBYGEMS_SETUP_BEGIN%% +if File.directory?(GEMS_LOCATION) + real_gems_path = File.realpath(GEMS_LOCATION) + if !defined?(Gem) || Gem.path != [real_gems_path] + $LOAD_PATH.reject! {|p| p =~ /vendor_ruby/ } + + # Suppress warnings from Rubygems + # https://github.com/OpenNebula/one/issues/5379 + begin + verb = $VERBOSE + $VERBOSE = nil + require 'rubygems' + Gem.use_paths(real_gems_path) + ensure + $VERBOSE = verb + end + end +end +# %%RUBYGEMS_SETUP_END%% + +$LOAD_PATH << RUBY_LIB_LOCATION + +require 'async/io' +require 'async/io/stream' +require 'async/io/trap' +require 'async/io/unix_endpoint' +require 'console' +require 'ffi' +require 'json' +require 'open3' +require 'socket' +require 'yaml' + +SERVICE_ADDR = '169.254.16.9' + +DEFAULT_CONFIG = { + :tproxy_debug_level => 2 # 0 = ERROR, 1 = WARNING, 2 = INFO, 3 = DEBUG +}.freeze + +LOG_LEVEL_MAP = { + 0 => 3, # ERROR + 1 => 2, # WARN + 2 => 1, # INFO + 3 => 0 # DEBUG +}.freeze + +$config = DEFAULT_CONFIG.dup +$logger = nil + +module VNMMAD + + module TProxy + + extend FFI::Library + + ffi_lib FFI::Library::LIBC + + attach_function :setns, [:int, :int], :int + + class ProxyPeer + + def initialize(daddr, dport) + @daddr, @dport = daddr, dport + end + + def run + Async do |task| + glue_peers(@task = task) + end + end + + def stop + @socket.close + @task.stop + end + + private + + def glue_peers(task) + @local_ep.accept do |client_peer| + $logger.debug(self) do + "Accept #{client_peer.remote_address.inspect}" + end + + begin + if @remote_peer_type == :unix + daddr = VNMMAD::TProxy.to_socket_path(@dport) + remote_ep = Async::IO::Endpoint.unix(daddr) + else + remote_ep = Async::IO::Endpoint.tcp(@daddr, @dport) + end + + remote_ep.connect do |remote_peer| + client_stream, remote_stream = Async::IO::Stream.new(client_peer), + Async::IO::Stream.new(remote_peer) + + glue_streams(client_stream, remote_stream, task).wait + ensure + $logger.debug(self) do + "Close #{remote_peer.remote_address.inspect}" + end + + remote_peer.close + end + rescue Errno::ECONNREFUSED, + Errno::ECONNRESET, + Errno::EHOSTUNREACH, + Errno::ETIMEDOUT => e + $logger.error(self) do + e.message + end + end + ensure + $logger.debug(self) do + "Close #{client_peer.remote_address.inspect}" + end + + client_peer.close + end + end + + def glue_streams(stream1, stream2, task) + task.async do |subtask| + concurrent = [] + concurrent << subtask.async do + while (chunk = stream1.read_partial) + stream2.write chunk + stream2.flush + end + end + concurrent << subtask.async do + while (chunk = stream2.read_partial) + stream1.write chunk + stream1.flush + end + end + concurrent.each(&:wait) + end + end + + end + + class InnerPeer < ProxyPeer + + def initialize(baddr, bport, daddr, dport) + super(daddr, dport) + + @remote_peer_type = :unix + + @local_ep = Async::IO::Endpoint.tcp(baddr, bport, :reuse_address => true) + @local_ep.bind do |sock| + (@socket = sock).listen Socket::SOMAXCONN + $logger.info(self) do + "Bind #{Addrinfo.tcp(baddr, bport).inspect}" + end + end + end + + end + + class InnerProxy + + def initialize(brdev = nil) + @brdev = brdev + @peers = {} + @sighup = Async::IO::Trap.new :HUP + @sighup.ignore! + @sighup.install! + end + + def run + Async do + reload + @sighup.wait { reload } + end + end + + private + + def reload + endpoints = VNMMAD::TProxy.load_peer_config(:brdev => @brdev)[:endpoints] + + return if endpoints.empty? + + # Stop and remove cancelled proxies. + @peers.keys.each do |k| + brdev, service_port = k + + next unless endpoints.dig(service_port, :brdev)&.include?(brdev) + + @peers.delete(k)&.stop + end + + # Create and start missing proxies. + endpoints.each do |service_port, v| + v[:brdev].each do |brdev| + next unless @peers[k = [brdev, service_port]].nil? + + (@peers[k] = InnerPeer.new(SERVICE_ADDR, service_port, + SERVICE_ADDR, service_port)).run + end + rescue StandardError => e + $logger.error(self) do + e.message + end + end + end + + end + + class OuterPeer < ProxyPeer + + def initialize(bport, daddr, dport) + super(daddr, dport) + + @remote_peer_type = :tcp + + baddr = VNMMAD::TProxy.to_socket_path(bport) + + @local_ep = Async::IO::Endpoint.unix(baddr) + @local_ep.bind do |sock| + @socket = sock + $logger.info(self) do + "Bind #{baddr}" + end + end + end + + end + + class OuterProxy + + def initialize + @peers = {} + @sighup = Async::IO::Trap.new :HUP + @sighup.ignore! + @sighup.install! + end + + def run + Async do + reload + @sighup.wait { reload } + end + end + + private + + def reload + endpoints = VNMMAD::TProxy.load_peer_config[:endpoints] + + return if endpoints.empty? + + # Stop and remove cancelled proxies. + @peers.keys.each do |k| + next unless endpoints[k].nil? + + @peers.delete(k)&.stop + end + + # Create and start missing proxies. + endpoints.each do |service_port, v| + next unless @peers[k = service_port].nil? + + (@peers[k] = OuterPeer.new(service_port, v[:daddr], v[:dport])).run + rescue StandardError => e + $logger.error(self) do + e.message + end + end + end + + end + + class Daemon + + def initialize(comm) + @comm = comm + end + + def run(argv = ARGV.dup, &block) + pid, cmd = detect + if !pid.nil? && argv[0] == 'status' + puts "#{cmd}: #{pid}" + return + end + if !pid.nil? && argv[0] == 'reload' + Process.kill(:HUP, pid.to_i) + return + end + if !pid.nil? && ['stop', 'restart'].include?(argv[0]) + Process.kill(:TERM, pid.to_i) + pid, = detect # rerun + end + if pid.nil? && ['start', 'restart'].include?(argv[0]) + fork do + $0 = @comm + + Process.setsid + + $stdin.reopen '/dev/null' + + $stdout.reopen "#{LOG_LOCATION}/#{@comm}.log", 'a' + $stdout.sync = true + + $stderr.reopen $stdout + $stderr.sync = true + + block.call + end + end + end + + private + + def detect + o, _, s = Open3.capture3 'ps', '--no-headers', '-wwo', 'pid,cmd', '-C', 'ruby' + o.lines + .map {|line| line.strip.split(' ', 2) } + .find {|_, cmd| cmd == @comm } if s.success? + end + + end + + def self.load_peer_config(family: 'ip', brdev: nil) + if brdev.nil? + cmd = "nsenter -n -t 1 /usr/sbin/nft -j list table #{family} one_tproxy" + else + cmd = "nsenter -n -t 1 /usr/sbin/nft -j list map #{family} one_tproxy ep_#{brdev}" + end + + o, _, s = Open3.capture3(*cmd.split(' ')) + + if s.success? + endpoints = JSON.parse(o)['nftables'].each_with_object({}) do |v, h| + next if v['map'].nil? + next if v['map']['name'] !~ %r{^ep_([^/:\s]+)$} + next if v['map']['elem'].to_a.empty? + + v['map']['elem'].each do |bport, daddr_dport| + h[bport] ||= { :brdev => [], :daddr => nil, :dport => nil } + h[bport][:brdev] << Regexp.last_match(1) + h[bport][:daddr] = daddr_dport['concat'][0] + h[bport][:dport] = daddr_dport['concat'][1] + end + end + + bridges = endpoints.values + .map {|v| v[:brdev] } + .flatten + .uniq + + { :endpoints => endpoints, :bridges => bridges } + else + { :endpoints => {}, :bridges => [] } + end + rescue StandardError + { :endpoints => {}, :bridges => [] } + end + + def self.cancel_spurious_proxies(config = nil) + o, _, s = Open3.capture3 'ps', '--no-headers', '-wwo', 'pid,cmd', '-C', 'ruby' + + return unless s.success? # nothing to stop (most likely) + + config ||= load_peer_config + + spurious = o.lines.each_with_object([]) do |line, a| + pid, cmd = line.strip.split(' ', 2) + + case cmd.strip + when %r{^one_tproxy_([^/:\s]+)$} + a << pid unless config[:bridges].include?(Regexp.last_match(1)) + when %r{^one_tproxy$} + a << pid if config[:endpoints].empty? && pid.to_i != Process.pid + end + end + + return if spurious.empty? + + spurious.each {|pid| Process.kill(:TERM, pid.to_i) } + end + + def self.use_netns(name) + File.open("/run/netns/#{name}", 'rb') do |f| + if VNMMAD::TProxy.setns(f.fileno, 0) != 0 + raise StandardError, 'Unable to set network namespace' + end + end + end + + def self.to_socket_path(service_port) + "#{RUN_LOCATION}/one_tproxy_#{service_port}.socket" + end + + end + +end + +# Architecture of this transparent proxy solution can be roughly visualized as: +# +# VM <--tcp--> InnerProxy (multiple processes, 1 per each dedicated netns) +# ^ +# | +# unix +# | +# v +# OuterProxy (only a single process, default netns) <--tcp--> SVC (like OneGate) +# +# To avoid any network related security risks we split TCP streams into "inner" and "outer" parts +# to glue them back again using unix sockets (that are completely unrelated to TCP/IP stacks). +# Keeping "inner" parts in dedicated VNET namespaces allows for perfect isolation from any other +# TCP/IP traffic going through HV machines. Of course, there is an assumption that it is possible to +# connect to service endpoints from HV machines via the backbone / service network. + +if caller.empty? + # The "CONFIGURATION_FILE" is updated during the host sync procedure. + $config.merge! YAML.load_file(CONFIGURATION_FILE) + + peer_config = VNMMAD::TProxy.load_peer_config + + VNMMAD::TProxy.cancel_spurious_proxies(peer_config) + + # Silently refuse to start if no configuration is discovered. + exit if peer_config[:endpoints].empty? + + VNMMAD::TProxy::Daemon.new('one_tproxy').run do + CustomLogger = Console::Filter[:debug => 0, :info => 1, :warn => 2, :error => 3] + $logger = CustomLogger.new Console::Serialized::Logger.new($stdout), + :level => LOG_LEVEL_MAP[$config[:tproxy_debug_level]] + VNMMAD::TProxy::OuterProxy.new.run + end + + peer_config[:bridges].each do |brdev| + VNMMAD::TProxy::Daemon.new("one_tproxy_#{brdev}").run do + CustomLogger = Console::Filter[:debug => 0, :info => 1, :warn => 2, :error => 3] + $logger = CustomLogger.new Console::Serialized::Logger.new($stdout), + :level => LOG_LEVEL_MAP[$config[:tproxy_debug_level]] + VNMMAD::TProxy.use_netns("one_tproxy_#{brdev}") + VNMMAD::TProxy::InnerProxy.new(brdev).run + end + end +end diff --git a/src/vnm_mad/remotes/lib/tproxy.rb b/src/vnm_mad/remotes/lib/tproxy.rb new file mode 100644 index 0000000000..18b00c4fd6 --- /dev/null +++ b/src/vnm_mad/remotes/lib/tproxy.rb @@ -0,0 +1,250 @@ +# -------------------------------------------------------------------------- # +# Copyright 2002-2024, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # + +require 'base64' +require 'erb' +require 'json' +require 'open3' +require 'resolv' + +module VNMMAD + + # Module to handle transparent proxies. + module TProxy + + # The entry point for the tproxy feature. + def self.setup_tproxy(nic, direction) + # Short-circuit if no tproxy config is provided. + return if CONF[:tproxy].to_a.empty? + + endpoints = CONF[:tproxy].to_a.each_with_object([]) do |conf, a| + # When networks are not specified add this proxy to all bridges. + next if !(nets = conf[:networks].to_a.map(&:to_s)).empty? \ + && (nets & nic.slice(:network, :network_id).values.map(&:to_s)).empty? + + next if conf[:service_port].nil? + next if conf[:remote_addr].nil? || conf[:remote_addr] !~ Resolv::IPv4::Regex + next if conf[:remote_port].nil? + + opts = { + :service_port => Integer(conf[:service_port] || conf[:remote_port]), + :remote_addr => conf[:remote_addr], + :remote_port => Integer(conf[:remote_port]) + } + + # Remove duplicated services, only the top one (lowest index) defined in the + # :tproxy array will be used, others must be ignored. + # This is not considered a limitation since users can define multiple arbitrary + # services by picking different service ports. At the same time it significantly + # simplifies tproxy implementation on HV machines. + if !(a.find {|item| item[:service_port] == opts[:service_port] }).nil? + OpenNebula.log_warning "Ignoring tproxy duplicate: #{opts}" + next + end + + a << opts + rescue ArgumentError + next + end + + # Short-circuit if no valid config is recognized. + return if endpoints.empty? + + if direction == :up + enable_tproxy(nic, endpoints) + else + disable_tproxy(nic, endpoints) + end + + # With empty config tproxy should voluntarily terminate, + # that effectively makes the "stop" operation unnecessary. + run_tproxy('start') + run_tproxy('reload') + end + + def self.enable_tproxy(nic, endpoints) + brdev = nic[:bridge] + + # IFNAMSIZ = 16 causes we cannot use custom prefixes for veth devices.. + ip_link_add_veth(brdev) + ip_link_set("dev #{brdev}b master #{brdev} up") + + ip_netns_add(brdev) + ip_link_set_netns(brdev) + + ip_netns_exec(brdev, "ip address replace 169.254.16.9/32 dev #{brdev}a") + ip_netns_exec(brdev, "ip link set dev #{brdev}a up") + + ip_netns_exec(brdev, "ip route replace default dev #{brdev}a") + + veth_mac = ip_netns_exec(brdev, + "ip -j link show dev #{brdev}a", + :expect_json => true).dig(0, 0, 'address') + + # This is required to avoid 169.254.16.9 address conflicts in case of VNETs + # used on multiple different HV hosts are attached to multiple guest VMs. + # Basically, we short-circuit any 169.254.16.9 communication and + # forcefully redirect every packet destined to 169.254.16.9 to be handled + # locally (regardless of the actual ARP resolution in guest VMs). + nft(ERB.new(<<~NFT, :trim_mode => '-').result(binding)) + table bridge one_tproxy { + chain ch_<%= brdev %> { + type filter hook prerouting priority dstnat; policy accept; + } + } + + flush chain bridge one_tproxy ch_<%= brdev %>; + + table bridge one_tproxy { + chain ch_<%= brdev %> { + meta ibrname "<%= brdev %>" \\ + ip daddr 169.254.16.9 \\ + meta pkttype set host ether daddr set <%= veth_mac %> \\ + accept + } + } + NFT + + # The tproxy processes read their config from "ip one_tproxy ep_*" maps + # defined in nftables, that way users can manually restart tproxy on demand + # without the need for providing any command line arguments. + # All maps are managed by the driver, proxies only read their contents. + nft(ERB.new(<<~NFT, :trim_mode => '-').result(binding)) + table ip one_tproxy { + map ep_<%= brdev %> { + type inet_service : ipv4_addr \\ + . inet_service; + } + } + + flush map ip one_tproxy ep_<%= brdev %>; + + <%- endpoints.each do |ep| -%> + add element ip one_tproxy ep_<%= brdev %> { + <%= ep[:service_port] %> : <%= ep[:remote_addr] %> \\ + . <%= ep[:remote_port] %> + } + <%- end -%> + NFT + end + + def self.disable_tproxy(nic, endpoints) + brdev = nic[:bridge] + + nft(ERB.new(<<~NFT, :trim_mode => '-').result(binding)) + table ip one_tproxy { + map ep_<%= brdev %> { + type inet_service : ipv4_addr \\ + . inet_service; + } + } + + delete map ip one_tproxy ep_<%= brdev %>; + NFT + + nft(ERB.new(<<~NFT, :trim_mode => '-').result(binding)) + table bridge one_tproxy { + chain ch_<%= brdev %> { + type filter hook prerouting priority dstnat; policy accept; + } + } + + delete chain bridge one_tproxy ch_<%= brdev %>; + NFT + + ip_link_delete_veth(brdev) + + ip_netns_delete(brdev) + end + + def self.ip_link_add_veth(brdev) + o, e, s = run(:ip, 'link', 'show', "#{brdev}b", :term => false) + if s.success? + [o, e, s] + else + run(:ip, 'link', 'add', "#{brdev}b", 'type', 'veth', 'peer', 'name', "#{brdev}a") + end + end + + def self.ip_link_delete_veth(brdev) + o, e, s = run(:ip, 'link', 'show', "#{brdev}b", :term => false) + if s.success? + run(:ip, 'link', 'delete', "#{brdev}b") + else + [o, e, s] + end + end + + def self.ip_link_set(cmd) + run(:ip, 'link', 'set', *cmd.strip.split(' ')) + end + + def self.ip_link_set_netns(brdev) + o, e, s = run(:ip, 'link', 'show', "#{brdev}a", :term => false) + if s.success? + run(:ip, 'link', 'set', "#{brdev}a", 'netns', "one_tproxy_#{brdev}") + else + [o, e, s] + end + end + + def self.ip_netns_add(brdev) + o, e, s = run(:ip, 'netns', 'pids', "one_tproxy_#{brdev}", :term => false) + if s.success? + [o, e, s] + else + run(:ip, 'netns', 'add', "one_tproxy_#{brdev}") + end + end + + def self.ip_netns_delete(brdev) + o, e, s = run(:ip, 'netns', 'pids', "one_tproxy_#{brdev}", :term => false) + if s.success? + run(:ip, 'netns', 'delete', "one_tproxy_#{brdev}") + else + [o, e, s] + end + end + + def self.ip_netns_exec(brdev, cmd, expect_json: false) + env = { 'NETNS' => "one_tproxy_#{brdev}" } + o, e, s = run(:ip_netns_exec, env, *cmd.strip.split(' ')) + if expect_json + if s.success? + [JSON.parse(o), e, s] + else + [{}, e, s] + end + else + [o, e, s] + end + end + + def self.nft(script, **opts) + run(:nft, '-f-', **opts, :stdin_data => script) + end + + def self.run_tproxy(cmd) + run(:tproxy, *cmd.strip.split(' ')) + end + + private_class_method def self.run(sym, *args, **opts) + VNMNetwork::Command.no_shell(sym, *args, **opts) + end + + end + +end diff --git a/src/vnm_mad/remotes/lib/vlan.rb b/src/vnm_mad/remotes/lib/vlan.rb index 9c7d12f415..08f398d711 100644 --- a/src/vnm_mad/remotes/lib/vlan.rb +++ b/src/vnm_mad/remotes/lib/vlan.rb @@ -53,6 +53,9 @@ module VNMMAD # Create the bridge. create_bridge(@nic) + # Setup transparent proxies. + TProxy.setup_tproxy(@nic, :up) + # Check that no other vlans are connected to this bridge validate_vlan_id if @nic[:conf][:validate_vlan_id] @@ -118,13 +121,18 @@ module VNMMAD # vlan) next unless @bridges.include? @nic[:bridge] - # Skip if we want to keep the empty bridge - next if @nic[:conf][:keep_empty_bridge] + # Inserting raw phydev into the bridge is incorrect, but + # it is possible some user makes that mistake. This might + # cause that cleanup is not triggered properly, so we do + # not treat phydev as "guest" on purpose here. + guests = @bridges[@nic[:bridge]] \ + - [@nic[:phydev], @nic[:vlan_dev], "#{@nic[:bridge]}b"] - # Skip if the vlan device is not the only left device in - # the bridge. - next if (@bridges[@nic[:bridge]].length > 1) || \ - !@bridges[@nic[:bridge]].include?(@nic[:vlan_dev]) + # Setup transparent proxies. + TProxy.setup_tproxy(@nic, :down) if guests.count < 1 + + # Skip the bridge removal (on demand or when still in use). + next if @nic[:conf][:keep_empty_bridge] || guests.count > 0 # Delete the vlan device. delete_vlan_dev diff --git a/src/vnm_mad/remotes/lib/vnmmad.rb b/src/vnm_mad/remotes/lib/vnmmad.rb index e0abbc3b0f..0d83ba6cc1 100644 --- a/src/vnm_mad/remotes/lib/vnmmad.rb +++ b/src/vnm_mad/remotes/lib/vnmmad.rb @@ -33,6 +33,7 @@ require 'sg_driver' require 'vlan' require 'no_vlan' require 'scripts_common' +require 'tproxy' Dir[File.expand_path('vnmmad-load.d', File.dirname(__FILE__)) + "/*.rb"].each{ |f| require f } @@ -52,7 +53,10 @@ rescue :vlan_mtu => '1500', :ipset_maxelem => '65536', :keep_empty_bridge => false, - :datastore_location => '/var/lib/one/datastores' + :datastore_location => '/var/lib/one/datastores', + :tproxy_debug_level => 2, # 0 = ERROR, 1 = WARNING, 2 = INFO, 3 = DEBUG + :tproxy_process_owner => 'oneadmin', + :tproxy => [] } end