#!/usr/sbin/nft -f define in-tcp-ports = { {{ ansible_port|default(22) }} {% if 'MX' in group_names %} , 25 # SMTP {% endif %} {% if 'LDAP_provider' in group_names %} , 636 # ldaps {% endif %} {% if 'IMAP' in group_names %} , 993 # imaps , 4190 # ManageSieve {% endif %} {% if 'MSA' in group_names %} , 587 # submission [RFC4409] , 465 # submission over TLS [RFC8314] {% endif %} {% if 'webmail' in group_names or 'lists' in group_names or 'wiki' in group_names or 'nextcloud' in group_names %} , 80 # HTTP , 443 # HTTP over SSL/TLS {% endif %} } define out-tcp-ports = { 22 , 80 # HTTP , 443 # HTTP over SSL/TLS {% if 'out' in group_names or 'MSA' in group_names %} , 25 # SMTP {% endif %} {% if 'LDAP_provider' in group_names %} , 11371 # OpenPGP HTTP Keyserver , 43 # whois {% elif 'MX' in group_names or 'lists' in group_names or 'nextcloud' in group_names %} , 636 # ldaps {% endif %} {% if 'IMAP' in group_names %} , 2703 # Razor2 {% endif %} } ############################################################################### flush ruleset table netdev filter { {% for if in ansible_interfaces %} {% if if != "lo" and ansible_facts[if].active %} {% set addr = (ansible_facts[if].ipv4 | default({'address': '0.0.0.0'})).address %} chain INGRESS-{{ if }} { type filter hook ingress device {{ if }} priority -499 policy accept # IPsec traffic (refined later in the filter rule) ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} meta secpath exists accept # rate-limiting is done directly by the kernel (net.ipv4.icmp_{ratelimit,ratemask} runtime options) icmp type { echo-reply, echo-request, destination-unreachable, time-exceeded } counter accept icmpv6 type { echo-reply, echo-request, destination-unreachable, packet-too-big, time-exceeded, parameter-problem } counter accept # accept neighbour discovery for autoconfiguration, RFC 4890 sec. 4.4.1 ip6 hoplimit 255 icmpv6 type { 133,134,135,136,141,142 } counter accept # accept link-local multicast receiver notification messages ip6 saddr fe80::/10 ip6 daddr ff02::/16 ip6 hoplimit 1 icmpv6 type { 130,131,132,143 } counter accept # drop all remaining ICMP/ICMPv6 traffic meta l4proto { icmp, icmpv6 } counter drop # bogon filter (cf. RFC 6890 for non-global ip addresses) define bogon = { 0.0.0.0/8 # this host, on this network (RFC 1122 sec. 3.2.1.3) {% if not addr | ansible.utils.ipaddr('10.0.0.0/8') %} , 10.0.0.0/8 # private-use (RFC 1918) {% endif %} , 100.64.0.0/10 # shared address space (RFC 6598) , 127.0.0.0/8 # loopback (RFC 1122, sec. 3.2.1.3) , 169.254.0.0/16 # link local (RFC 3927) , 172.16.0.0/12 # private-use (RFC 1918) , 192.0.0.0/24 # IETF protocol assignments (RFC 6890 sec. 2.1) , 192.0.2.0/24 # documentation (RFC 5737) {% if not addr | ansible.utils.ipaddr('192.168.0.0/16') %} , 192.168.0.0/16 # private-use (RFC 1918) {% endif %} , 198.18.0.0/15 # benchmarking (RFC 2544) , 198.51.100.0/24 # documentation (RFC 5737) , 203.0.113.0/24 # documentation (RFC 5737) , 224.0.0.0/3 # multicast - class D 224.0.0.0/4 + class E 240.0.0.0/4 (RFC 1112 sec. 4) , 255.255.255.255/32 # limited broadcast (RFC 0919 sec. 7) } ip saddr $bogon counter drop ip daddr $bogon counter drop # See also https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt define bogon6 = { ::1/128 # loopback address (RFC 4291) , ::/128 # unspecified (RFC 4291) , ::ffff:0:0/96 # IPv4-mapped address (RFC 4291) , 100::/64 # discard-only address block (RFC 6666) , 2001::/23 # IETF protocol assignments (RFC 2928) , 2001::/32 # TEREDO (RFC 4380) , 2001:2::/48 # benchmarking (RFC 5180) , 2001:db8::/32 # documentation (RFC 3849) , 2001:10::/28 # ORCHID (RFC 4843) , 2002::/16 # 6to4 (RFC 3056) , fc00::/7 # unique-local (RFC 4193) , fe80::/10 # linked-scoped unicast (RFC 4291) } ip6 saddr $bogon6 counter drop ip6 saddr $bogon6 counter drop } {% endif %} {% endfor %} } table inet raw { chain PREROUTING-stateless { # XXX can't add that to the ingress hook as that happens before IP defragmentation # so we don't have the TCP header in later fragments (we don't want to drop IP # fragments, see https://blog.cloudflare.com/ip-fragmentation-is-broken/ ) type filter hook prerouting priority -399 # > NF_IP_PRI_CONNTRACK_DEFRAG (-400) policy accept # stateless filter for bogus TCP packets tcp flags & (fin|syn|rst|psh|ack|urg) == 0x0 counter drop # null packet tcp flags & (fin|psh|urg) == fin|psh|urg counter drop # XMAS packet tcp flags & (syn|rst) == syn|rst counter drop tcp flags & (fin|rst) == fin|rst counter drop tcp flags & (fin|syn) == fin|syn counter drop tcp flags & (fin|psh|ack) == fin|psh counter drop } chain PREROUTING { type filter hook prerouting priority -199 # > NF_IP_PRI_CONNTRACK (-200) policy accept # stateful filter ct state invalid counter drop } } table inet filter { # blackholes set fail2ban { type ipv4_addr; timeout 10m; } set fail2ban6 { type ipv6_addr; timeout 10m; } chain input { type filter hook input priority 0 policy drop iif lo accept # XXX Bullseye: this is a rather crude match as nftables 0.9.0 lacks support for ipsec expressions # to match match inbound resp. outbound policies and source resp. destination tunnel addresses. # https://serverfault.com/questions/971735/how-to-match-reqid-in-nftables # https://blog.fraggod.net/2016/09/25/nftables-re-injected-ipsec-matching-without-xt_policy.html # (We can't use marks to match post-ESP decapsulation here because that doesn't work well with UDP # encapsulation.) We'll also pin the reqid to the lowest address byte in ipsec.conf(5); that way # peers can't impersonate each other. meta l4proto esp accept # ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} ipsec in reqid $i accept ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} meta secpath exists accept # incoming ICMP/ICMPv6 traffic was filtered in the ingress chain already meta l4proto { icmp, icmpv6 } counter accept # NTP (ntpd uses sport 123 but systemd-timesyncd does not) udp sport 123 ct state related,established accept {% if groups.all | length > 1 %} udp sport 500 udp dport 500 ct state new,related,established accept {% if groups.NATed | length > 0 %} udp sport 4500 udp dport 4500 ct state new,related,established accept {% endif %} {% endif %} udp sport 53 ct state related,established accept tcp sport 53 ct state related,established accept {% if 'dhclient' in group_names %} ip version 4 udp sport 67 udp dport 68 ct state related,established accept ip6 version 6 udp sport 547 udp dport 546 ct state related,established accept {% endif %} meta l4proto tcp ip saddr @fail2ban counter drop meta l4proto tcp ip6 saddr @fail2ban6 counter drop tcp dport $in-tcp-ports ct state related,established accept tcp dport $in-tcp-ports ct state new counter accept tcp sport $out-tcp-ports ct state related,established accept } chain output { type filter hook output priority 0 policy drop oif lo accept # XXX Bullseye: unlike for input we can't use marks or test for # secpath existence here, because by the time we see a packet to # 172.16.0.0/24 we don't know if it'll be encapsulated meta l4proto esp accept # ip saddr {{ ipsec[inventory_hostname_short] }} ip daddr {{ ipsec_subnet }} ipsec out reqid $i accept ip saddr {{ ipsec[inventory_hostname_short] }} ip daddr {{ ipsec_subnet }} accept meta l4proto { icmp, icmpv6 } counter accept # NTP (ntpd uses sport 123 but systemd-timesyncd does not) udp dport 123 ct state new,related,established accept {% if groups.all | length > 1 %} udp sport 500 udp dport 500 ct state new,related,established accept {% if groups.NATed | length > 0 %} udp sport 4500 udp dport 4500 ct state new,related,established accept {% endif %} {% endif %} udp dport 53 ct state new,related,established accept tcp dport 53 ct state new,related,established accept {% if 'dhclient' in group_names %} ip version 4 udp sport 68 udp dport 67 ct state new,related,established accept ip6 version 6 udp sport 546 udp dport 547 ct state new,related,established accept {% endif %} tcp sport $in-tcp-ports ct state related,established accept tcp dport $out-tcp-ports ct state related,established accept tcp dport $out-tcp-ports ct state new counter accept meta l4proto tcp counter reject with tcp reset meta l4proto udp counter reject counter reject } }