1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
|
#!/usr/sbin/nft -f
define in-tcp-ports = {
{{ ansible_port|default(22) }}
{% if 'MX' in group_names %}
, 25 # SMTP
{% endif %}
{% if 'LDAP_provider' in group_names %}
, 636 # ldaps
{% endif %}
{% if 'IMAP' in group_names %}
, 993 # imaps
, 4190 # ManageSieve
{% endif %}
{% if 'MSA' in group_names %}
, 587 # submission [RFC4409]
, 465 # submission over TLS [RFC8314]
{% endif %}
{% if 'webmail' in group_names or 'lists' in group_names or 'wiki' in group_names or 'nextcloud' in group_names %}
, 80 # HTTP
, 443 # HTTP over SSL/TLS
{% endif %}
}
define out-tcp-ports = {
22
, 80 # HTTP
, 443 # HTTP over SSL/TLS
{% if 'out' in group_names or 'MSA' in group_names %}
, 25 # SMTP
{% endif %}
{% if 'LDAP_provider' in group_names %}
, 11371 # OpenPGP HTTP Keyserver
, 43 # whois
{% elif 'MX' in group_names or 'lists' in group_names or 'nextcloud' in group_names %}
, 636 # ldaps
{% endif %}
{% if 'IMAP' in group_names %}
, 2703 # Razor2
{% endif %}
}
###############################################################################
flush ruleset
table netdev filter {
{% for if in ansible_interfaces %}
{% if if != "lo" and ansible_facts[if].active %}
{% set addr = (ansible_facts[if].ipv4 | default({'address': '0.0.0.0'})).address %}
chain INGRESS-{{ if }} {
type filter hook ingress device {{ if }} priority -499
policy accept
# IPsec traffic (refined later in the filter rule)
ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} meta secpath exists accept
# rate-limiting is done directly by the kernel (net.ipv4.icmp_{ratelimit,ratemask} runtime options)
icmp type { echo-reply, echo-request, destination-unreachable, time-exceeded } counter accept
icmpv6 type { echo-reply, echo-request, destination-unreachable,
packet-too-big, time-exceeded, parameter-problem } counter accept
# accept neighbour discovery for autoconfiguration, RFC 4890 sec. 4.4.1
ip6 hoplimit 255 icmpv6 type { 133,134,135,136,141,142 } counter accept
# accept link-local multicast receiver notification messages
ip6 saddr fe80::/10 ip6 daddr ff02::/16 ip6 hoplimit 1 icmpv6 type { 130,131,132,143 } counter accept
# drop all remaining ICMP/ICMPv6 traffic
meta l4proto { icmp, icmpv6 } counter drop
# bogon filter (cf. RFC 6890 for non-global ip addresses)
define bogon = {
0.0.0.0/8 # this host, on this network (RFC 1122 sec. 3.2.1.3)
{% if not addr | ansible.utils.ipaddr('10.0.0.0/8') %}
, 10.0.0.0/8 # private-use (RFC 1918)
{% endif %}
, 100.64.0.0/10 # shared address space (RFC 6598)
, 127.0.0.0/8 # loopback (RFC 1122, sec. 3.2.1.3)
, 169.254.0.0/16 # link local (RFC 3927)
, 172.16.0.0/12 # private-use (RFC 1918)
, 192.0.0.0/24 # IETF protocol assignments (RFC 6890 sec. 2.1)
, 192.0.2.0/24 # documentation (RFC 5737)
{% if not addr | ansible.utils.ipaddr('192.168.0.0/16') %}
, 192.168.0.0/16 # private-use (RFC 1918)
{% endif %}
, 198.18.0.0/15 # benchmarking (RFC 2544)
, 198.51.100.0/24 # documentation (RFC 5737)
, 203.0.113.0/24 # documentation (RFC 5737)
, 224.0.0.0/3 # multicast - class D 224.0.0.0/4 + class E 240.0.0.0/4 (RFC 1112 sec. 4)
, 255.255.255.255/32 # limited broadcast (RFC 0919 sec. 7)
}
ip saddr $bogon counter drop
ip daddr $bogon counter drop
# See also https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt
define bogon6 = {
::1/128 # loopback address (RFC 4291)
, ::/128 # unspecified (RFC 4291)
, ::ffff:0:0/96 # IPv4-mapped address (RFC 4291)
, 100::/64 # discard-only address block (RFC 6666)
, 2001::/23 # IETF protocol assignments (RFC 2928)
, 2001::/32 # TEREDO (RFC 4380)
, 2001:2::/48 # benchmarking (RFC 5180)
, 2001:db8::/32 # documentation (RFC 3849)
, 2001:10::/28 # ORCHID (RFC 4843)
, 2002::/16 # 6to4 (RFC 3056)
, fc00::/7 # unique-local (RFC 4193)
, fe80::/10 # linked-scoped unicast (RFC 4291)
}
ip6 saddr $bogon6 counter drop
ip6 saddr $bogon6 counter drop
}
{% endif %}
{% endfor %}
}
table inet raw {
chain PREROUTING-stateless {
# XXX can't add that to the ingress hook as that happens before IP defragmentation
# so we don't have the TCP header in later fragments (we don't want to drop IP
# fragments, see https://blog.cloudflare.com/ip-fragmentation-is-broken/ )
type filter hook prerouting priority -399 # > NF_IP_PRI_CONNTRACK_DEFRAG (-400)
policy accept
# stateless filter for bogus TCP packets
tcp flags & (fin|syn|rst|psh|ack|urg) == 0x0 counter drop # null packet
tcp flags & (fin|psh|urg) == fin|psh|urg counter drop # XMAS packet
tcp flags & (syn|rst) == syn|rst counter drop
tcp flags & (fin|rst) == fin|rst counter drop
tcp flags & (fin|syn) == fin|syn counter drop
tcp flags & (fin|psh|ack) == fin|psh counter drop
}
chain PREROUTING {
type filter hook prerouting priority -199 # > NF_IP_PRI_CONNTRACK (-200)
policy accept
# stateful filter
ct state invalid counter drop
}
}
table inet filter {
# blackholes (timeout must match /etc/fail2ban/jail.local)
set fail2ban { type ipv4_addr; timeout 10m; }
set fail2ban6 { type ipv6_addr; timeout 10m; }
chain input {
type filter hook input priority 0
policy drop
iif lo accept
# XXX Bullseye: this is a rather crude match as nftables 0.9.0 lacks support for ipsec expressions
# to match match inbound resp. outbound policies and source resp. destination tunnel addresses.
# https://serverfault.com/questions/971735/how-to-match-reqid-in-nftables
# https://blog.fraggod.net/2016/09/25/nftables-re-injected-ipsec-matching-without-xt_policy.html
# (We can't use marks to match post-ESP decapsulation here because that doesn't work well with UDP
# encapsulation.) We'll also pin the reqid to the lowest address byte in ipsec.conf(5); that way
# peers can't impersonate each other.
meta l4proto esp accept
# ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} ipsec in reqid $i accept
ip saddr {{ ipsec_subnet }} ip daddr {{ ipsec[inventory_hostname_short] }} meta secpath exists accept
# incoming ICMP/ICMPv6 traffic was filtered in the ingress chain already
meta l4proto { icmp, icmpv6 } counter accept
# NTP (ntpd uses sport 123 but systemd-timesyncd does not)
udp sport 123 ct state related,established accept
{% if groups.all | length > 1 %}
udp sport 500 udp dport 500 ct state new,related,established accept
{% if groups.NATed | length > 0 %}
udp sport 4500 udp dport 4500 ct state new,related,established accept
{% endif %}
{% endif %}
udp sport 53 ct state related,established accept
tcp sport 53 ct state related,established accept
{% if 'dhclient' in group_names %}
ip version 4 udp sport 67 udp dport 68 ct state related,established accept
ip6 version 6 udp sport 547 udp dport 546 ct state related,established accept
{% endif %}
ip saddr @fail2ban counter drop
ip6 saddr @fail2ban6 counter drop
tcp dport $in-tcp-ports ct state related,established accept
tcp dport $in-tcp-ports ct state new counter accept
tcp sport $out-tcp-ports ct state related,established accept
}
chain output {
type filter hook output priority 0
policy drop
oif lo accept
# XXX Bullseye: unlike for input we can't use marks or test for
# secpath existence here, because by the time we see a packet to
# 172.16.0.0/24 we don't know if it'll be encapsulated
meta l4proto esp accept
# ip saddr {{ ipsec[inventory_hostname_short] }} ip daddr {{ ipsec_subnet }} ipsec out reqid $i accept
ip saddr {{ ipsec[inventory_hostname_short] }} ip daddr {{ ipsec_subnet }} accept
meta l4proto { icmp, icmpv6 } counter accept
# NTP (ntpd uses sport 123 but systemd-timesyncd does not)
udp dport 123 ct state new,related,established accept
{% if groups.all | length > 1 %}
udp sport 500 udp dport 500 ct state new,related,established accept
{% if groups.NATed | length > 0 %}
udp sport 4500 udp dport 4500 ct state new,related,established accept
{% endif %}
{% endif %}
udp dport 53 ct state new,related,established accept
tcp dport 53 ct state new,related,established accept
{% if 'dhclient' in group_names %}
ip version 4 udp sport 68 udp dport 67 ct state new,related,established accept
ip6 version 6 udp sport 546 udp dport 547 ct state new,related,established accept
{% endif %}
tcp sport $in-tcp-ports ct state related,established accept
tcp dport $out-tcp-ports ct state related,established accept
tcp dport $out-tcp-ports ct state new counter accept
meta l4proto tcp counter reject with tcp reset
meta l4proto udp counter reject
counter reject
}
}
|