summaryrefslogtreecommitdiffstats
path: root/roles/common/templates/etc/nftables.conf.j2
blob: 66b1f9dbe2f18a60b6cc70762fdf3b953d36a70c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/sbin/nft -f

define in-tcp-ports = {
    {{ ansible_port|default(22) }}
{% if 'MX' in group_names %}
  , 25      # SMTP
{% endif %}
{% if 'LDAP_provider' in group_names %}
  , 636     # ldaps
{% endif %}
{% if 'IMAP' in group_names %}
  , 993     # imaps
  , 4190    # ManageSieve
{% endif %}
{% if 'MSA' in group_names %}
  , 587     # submission [RFC4409]
  , 465     # submission over TLS [RFC8314]
{% endif %}
{% if 'webmail' in group_names or 'lists' in group_names or 'wiki' in group_names or 'nextcloud' in group_names %}
  , 80      # HTTP
  , 443     # HTTP over SSL/TLS
{% endif %}
}

define out-tcp-ports = {
    22
  , 80      # HTTP
  , 443     # HTTP over SSL/TLS
{% if 'out' in group_names or 'MSA' in group_names %}
  , 25      # SMTP
{% endif %}
{% if 'LDAP_provider' in group_names %}
  , 11371   # OpenPGP HTTP Keyserver
  , 43      # whois
{% elif 'MX' in group_names or 'lists' in group_names or 'nextcloud' in group_names %}
  , 636     # ldaps
{% endif %}
{% if 'IMAP' in group_names %}
  , 2703    # Razor2
{% endif %}
}


###############################################################################

flush ruleset

table netdev filter {
{% for if in ansible_interfaces %}
{% if if != "lo" and ansible_facts[if].active %}
{% set addr = (ansible_facts[if].ipv4 | default({'address': '0.0.0.0'})).address %}
    chain INGRESS-{{ if }} {
        type filter hook ingress device {{ if }} priority -499
        policy accept

        # IPsec traffic (refined later in the filter rule)
        ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} meta secpath exists accept

        # rate-limiting is done directly by the kernel (net.ipv4.icmp_{ratelimit,ratemask} runtime options)
        icmp   type { echo-reply, echo-request, destination-unreachable, time-exceeded } counter accept
        icmpv6 type { echo-reply, echo-request, destination-unreachable,
                      packet-too-big, time-exceeded, parameter-problem } counter accept

        # accept neighbour discovery for autoconfiguration, RFC 4890 sec. 4.4.1
        ip6 hoplimit 255 icmpv6 type { 133,134,135,136,141,142 } counter accept

        # accept link-local multicast receiver notification messages
        ip6 saddr fe80::/10 ip6 daddr ff02::/16 ip6 hoplimit 1 icmpv6 type { 130,131,132,143 } counter accept

        # drop all remaining ICMP/ICMPv6 traffic
        meta l4proto { icmp, icmpv6 } counter drop

        # bogon filter (cf. RFC 6890 for non-global ip addresses)
        define bogon = {
            0.0.0.0/8          # this host, on this network (RFC 1122 sec. 3.2.1.3)
{% if not addr | ansible.utils.ipaddr('10.0.0.0/8') %}
          , 10.0.0.0/8         # private-use (RFC 1918)
{% endif %}
          , 100.64.0.0/10      # shared address space (RFC 6598)
          , 127.0.0.0/8        # loopback (RFC 1122, sec. 3.2.1.3)
          , 169.254.0.0/16     # link local (RFC 3927)
          , 172.16.0.0/12      # private-use (RFC 1918)
          , 192.0.0.0/24       # IETF protocol assignments (RFC 6890 sec. 2.1)
          , 192.0.2.0/24       # documentation (RFC 5737)
{% if not addr | ansible.utils.ipaddr('192.168.0.0/16') %}
          , 192.168.0.0/16     # private-use (RFC 1918)
{% endif %}
          , 198.18.0.0/15      # benchmarking (RFC 2544)
          , 198.51.100.0/24    # documentation (RFC 5737)
          , 203.0.113.0/24     # documentation (RFC 5737)
          , 224.0.0.0/3        # multicast - class D 224.0.0.0/4 + class E 240.0.0.0/4 (RFC 1112 sec. 4)
          , 255.255.255.255/32 # limited broadcast (RFC 0919 sec. 7)
        }

        ip saddr $bogon counter drop
        ip daddr $bogon counter drop

        # See also https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt
        define bogon6 = {
            ::1/128         # loopback address (RFC 4291)
          , ::/128          # unspecified (RFC 4291)
          , ::ffff:0:0/96   # IPv4-mapped address (RFC 4291)
          , 100::/64        # discard-only address block (RFC 6666)
          , 2001::/23       # IETF protocol assignments (RFC 2928)
          , 2001::/32       # TEREDO (RFC 4380)
          , 2001:2::/48     # benchmarking (RFC 5180)
          , 2001:db8::/32   # documentation (RFC 3849)
          , 2001:10::/28    # ORCHID (RFC 4843)
          , 2002::/16       # 6to4 (RFC 3056)
          , fc00::/7        # unique-local (RFC 4193)
          , fe80::/10       # linked-scoped unicast (RFC 4291)
        }

        ip6 saddr $bogon6 counter drop
        ip6 saddr $bogon6 counter drop
    }
{% endif %}
{% endfor %}
}

table inet raw {
    chain PREROUTING-stateless {
        # XXX can't add that to the ingress hook as that happens before IP defragmentation
        # so we don't have the TCP header in later fragments (we don't want to drop IP
        # fragments, see https://blog.cloudflare.com/ip-fragmentation-is-broken/ )
        type filter hook prerouting priority -399 # > NF_IP_PRI_CONNTRACK_DEFRAG (-400)
        policy accept

        # stateless filter for bogus TCP packets
        tcp flags & (fin|syn|rst|psh|ack|urg) == 0x0 counter drop # null packet
        tcp flags & (fin|psh|urg) == fin|psh|urg     counter drop # XMAS packet
        tcp flags & (syn|rst) == syn|rst             counter drop
        tcp flags & (fin|rst) == fin|rst             counter drop
        tcp flags & (fin|syn) == fin|syn             counter drop
        tcp flags & (fin|psh|ack) == fin|psh         counter drop
    }

    chain PREROUTING {
        type filter hook prerouting priority -199 # > NF_IP_PRI_CONNTRACK (-200)
        policy accept

        # stateful filter
        ct state invalid counter drop
    }
}

table inet filter {
    # blackholes
    set fail2ban  { type ipv4_addr; timeout 10m; }
    set fail2ban6 { type ipv6_addr; timeout 10m; }

    chain input {
        type filter hook input priority 0
        policy drop

        iif lo accept

        # XXX Bullseye: this is a rather crude match as nftables 0.9.0 lacks support for ipsec expressions
        # to match match inbound resp. outbound policies and source resp. destination tunnel addresses.
        # https://serverfault.com/questions/971735/how-to-match-reqid-in-nftables
        # https://blog.fraggod.net/2016/09/25/nftables-re-injected-ipsec-matching-without-xt_policy.html
        # (We can't use marks to match post-ESP decapsulation here because that doesn't work well with UDP
        # encapsulation.)  We'll also pin the reqid to the lowest address byte in ipsec.conf(5); that way
        # peers can't impersonate each other.
        meta l4proto esp accept
        # ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} ipsec in reqid $i accept
        ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} meta secpath exists accept

        # incoming ICMP/ICMPv6 traffic was filtered in the ingress chain already
        meta l4proto { icmp, icmpv6 } counter accept

        # NTP (ntpd uses sport 123 but systemd-timesyncd does not)
        udp sport 123 ct state related,established accept

{% if groups.all | length > 1 %}
        udp sport  500 udp dport  500 ct state new,related,established accept
{% if groups.NATed | length > 0 %}
        udp sport 4500 udp dport 4500 ct state new,related,established accept
{% endif %}
{% endif %}

        udp sport 53 ct state related,established accept
        tcp sport 53 ct state related,established accept
{% if 'dhclient' in group_names %}
        ip  version 4 udp sport  67 udp dport  68 ct state related,established accept
        ip6 version 6 udp sport 547 udp dport 546 ct state related,established accept
{% endif %}

        meta l4proto tcp ip  saddr @fail2ban  counter drop
        meta l4proto tcp ip6 saddr @fail2ban6 counter drop

        tcp dport $in-tcp-ports  ct state related,established accept
        tcp dport $in-tcp-ports  ct state new counter accept
        tcp sport $out-tcp-ports ct state related,established accept
    }

    chain output {
        type filter hook output priority 0
        policy drop

        oif lo accept

        # XXX Bullseye: unlike for input we can't use marks or test for
        # secpath existence here, because by the time we see a packet to
        # 172.16.0.0/24 we don't know if it'll be encapsulated
        meta l4proto esp accept
        # ip saddr {{ ipsec[inventory_hostname_short] }} ip daddr {{ ipsec_subnet }} ipsec out reqid $i accept
        ip saddr {{ ipsec[inventory_hostname_short] }} ip daddr {{ ipsec_subnet }} accept

        meta l4proto { icmp, icmpv6 } counter accept

        # NTP (ntpd uses sport 123 but systemd-timesyncd does not)
        udp dport 123 ct state new,related,established accept

{% if groups.all | length > 1 %}
        udp sport  500 udp dport  500 ct state new,related,established accept
{% if groups.NATed | length > 0 %}
        udp sport 4500 udp dport 4500 ct state new,related,established accept
{% endif %}
{% endif %}

        udp dport 53 ct state new,related,established accept
        tcp dport 53 ct state new,related,established accept
{% if 'dhclient' in group_names %}
        ip  version 4 udp sport  68 udp dport  67 ct state new,related,established accept
        ip6 version 6 udp sport 546 udp dport 547 ct state new,related,established accept
{% endif %}

        tcp sport $in-tcp-ports  ct state related,established accept
        tcp dport $out-tcp-ports ct state related,established accept
        tcp dport $out-tcp-ports ct state new counter accept

        meta l4proto tcp counter reject with tcp reset
        meta l4proto udp counter reject
        counter reject
    }
}