1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * lwtunnel Infrastructure for light weight tunnels like mpls
4 *
5 * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
6 */
7
8#include <linux/capability.h>
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/lwtunnel.h>
17#include <linux/in.h>
18#include <linux/init.h>
19#include <linux/err.h>
20
21#include <net/lwtunnel.h>
22#include <net/rtnetlink.h>
23#include <net/ip6_fib.h>
24#include <net/rtnh.h>
25
26#include "dev.h"
27
28DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
29EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
30
31#ifdef CONFIG_MODULES
32
33static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
34{
35 /* Only lwt encaps implemented without using an interface for
36 * the encap need to return a string here.
37 */
38 switch (encap_type) {
39 case LWTUNNEL_ENCAP_MPLS:
40 return "MPLS";
41 case LWTUNNEL_ENCAP_ILA:
42 return "ILA";
43 case LWTUNNEL_ENCAP_SEG6:
44 return "SEG6";
45 case LWTUNNEL_ENCAP_BPF:
46 return "BPF";
47 case LWTUNNEL_ENCAP_SEG6_LOCAL:
48 return "SEG6LOCAL";
49 case LWTUNNEL_ENCAP_RPL:
50 return "RPL";
51 case LWTUNNEL_ENCAP_IOAM6:
52 return "IOAM6";
53 case LWTUNNEL_ENCAP_XFRM:
54 /* module autoload not supported for encap type */
55 return NULL;
56 case LWTUNNEL_ENCAP_IP6:
57 case LWTUNNEL_ENCAP_IP:
58 case LWTUNNEL_ENCAP_NONE:
59 case __LWTUNNEL_ENCAP_MAX:
60 /* should not have got here */
61 WARN_ON(1);
62 break;
63 }
64 return NULL;
65}
66
67#endif /* CONFIG_MODULES */
68
69struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
70{
71 struct lwtunnel_state *lws;
72
73 lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
74
75 return lws;
76}
77EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
78
79static const struct lwtunnel_encap_ops __rcu *
80 lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
81
82int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
83 unsigned int num)
84{
85 if (num > LWTUNNEL_ENCAP_MAX)
86 return -ERANGE;
87
88 return !cmpxchg((const struct lwtunnel_encap_ops **)
89 &lwtun_encaps[num],
90 NULL, ops) ? 0 : -1;
91}
92EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
93
94int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
95 unsigned int encap_type)
96{
97 int ret;
98
99 if (encap_type == LWTUNNEL_ENCAP_NONE ||
100 encap_type > LWTUNNEL_ENCAP_MAX)
101 return -ERANGE;
102
103 ret = (cmpxchg((const struct lwtunnel_encap_ops **)
104 &lwtun_encaps[encap_type],
105 ops, NULL) == ops) ? 0 : -1;
106
107 synchronize_net();
108
109 return ret;
110}
111EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
112
113int lwtunnel_build_state(struct net *net, u16 encap_type,
114 struct nlattr *encap, unsigned int family,
115 const void *cfg, struct lwtunnel_state **lws,
116 struct netlink_ext_ack *extack)
117{
118 const struct lwtunnel_encap_ops *ops;
119 bool found = false;
120 int ret = -EINVAL;
121
122 if (encap_type == LWTUNNEL_ENCAP_NONE ||
123 encap_type > LWTUNNEL_ENCAP_MAX) {
124 NL_SET_ERR_MSG_ATTR(extack, encap,
125 "Unknown LWT encapsulation type");
126 return ret;
127 }
128
129 ret = -EOPNOTSUPP;
130 rcu_read_lock();
131 ops = rcu_dereference(lwtun_encaps[encap_type]);
132 if (likely(ops && ops->build_state && try_module_get(ops->owner)))
133 found = true;
134 rcu_read_unlock();
135
136 if (found) {
137 ret = ops->build_state(net, encap, family, cfg, lws, extack);
138 if (ret)
139 module_put(module: ops->owner);
140 } else {
141 /* don't rely on -EOPNOTSUPP to detect match as build_state
142 * handlers could return it
143 */
144 NL_SET_ERR_MSG_ATTR(extack, encap,
145 "LWT encapsulation type not supported");
146 }
147
148 return ret;
149}
150EXPORT_SYMBOL_GPL(lwtunnel_build_state);
151
152int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
153{
154 const struct lwtunnel_encap_ops *ops;
155 int ret = -EINVAL;
156
157 if (encap_type == LWTUNNEL_ENCAP_NONE ||
158 encap_type > LWTUNNEL_ENCAP_MAX) {
159 NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
160 return ret;
161 }
162
163 ops = rcu_access_pointer(lwtun_encaps[encap_type]);
164#ifdef CONFIG_MODULES
165 if (!ops) {
166 const char *encap_type_str = lwtunnel_encap_str(encap_type);
167
168 if (encap_type_str) {
169 request_module("rtnl-lwt-%s", encap_type_str);
170 ops = rcu_access_pointer(lwtun_encaps[encap_type]);
171 }
172 }
173#endif
174 ret = ops ? 0 : -EOPNOTSUPP;
175 if (ret < 0)
176 NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
177
178 return ret;
179}
180EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
181
182int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
183 struct netlink_ext_ack *extack)
184{
185 struct rtnexthop *rtnh = (struct rtnexthop *)attr;
186 struct nlattr *nla_entype;
187 struct nlattr *attrs;
188 u16 encap_type;
189 int attrlen;
190
191 while (rtnh_ok(rtnh, remaining)) {
192 attrlen = rtnh_attrlen(rtnh);
193 if (attrlen > 0) {
194 attrs = rtnh_attrs(rtnh);
195 nla_entype = nla_find(head: attrs, len: attrlen, attrtype: RTA_ENCAP_TYPE);
196
197 if (nla_entype) {
198 if (nla_len(nla: nla_entype) < sizeof(u16)) {
199 NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE");
200 return -EINVAL;
201 }
202 encap_type = nla_get_u16(nla: nla_entype);
203
204 if (lwtunnel_valid_encap_type(encap_type, extack))
205 return -EOPNOTSUPP;
206 }
207 }
208 rtnh = rtnh_next(rtnh, remaining: &remaining);
209 }
210
211 return 0;
212}
213EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
214
215void lwtstate_free(struct lwtunnel_state *lws)
216{
217 const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
218
219 if (ops->destroy_state) {
220 ops->destroy_state(lws);
221 kfree_rcu(lws, rcu);
222 } else {
223 kfree(objp: lws);
224 }
225 module_put(module: ops->owner);
226}
227EXPORT_SYMBOL_GPL(lwtstate_free);
228
229int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
230 int encap_attr, int encap_type_attr)
231{
232 const struct lwtunnel_encap_ops *ops;
233 struct nlattr *nest;
234 int ret;
235
236 if (!lwtstate)
237 return 0;
238
239 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
240 lwtstate->type > LWTUNNEL_ENCAP_MAX)
241 return 0;
242
243 nest = nla_nest_start_noflag(skb, attrtype: encap_attr);
244 if (!nest)
245 return -EMSGSIZE;
246
247 ret = -EOPNOTSUPP;
248 rcu_read_lock();
249 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
250 if (likely(ops && ops->fill_encap))
251 ret = ops->fill_encap(skb, lwtstate);
252 rcu_read_unlock();
253
254 if (ret)
255 goto nla_put_failure;
256 nla_nest_end(skb, start: nest);
257 ret = nla_put_u16(skb, attrtype: encap_type_attr, value: lwtstate->type);
258 if (ret)
259 goto nla_put_failure;
260
261 return 0;
262
263nla_put_failure:
264 nla_nest_cancel(skb, start: nest);
265
266 return (ret == -EOPNOTSUPP ? 0 : ret);
267}
268EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
269
270int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
271{
272 const struct lwtunnel_encap_ops *ops;
273 int ret = 0;
274
275 if (!lwtstate)
276 return 0;
277
278 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
279 lwtstate->type > LWTUNNEL_ENCAP_MAX)
280 return 0;
281
282 rcu_read_lock();
283 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
284 if (likely(ops && ops->get_encap_size))
285 ret = nla_total_size(payload: ops->get_encap_size(lwtstate));
286 rcu_read_unlock();
287
288 return ret;
289}
290EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
291
292int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
293{
294 const struct lwtunnel_encap_ops *ops;
295 int ret = 0;
296
297 if (!a && !b)
298 return 0;
299
300 if (!a || !b)
301 return 1;
302
303 if (a->type != b->type)
304 return 1;
305
306 if (a->type == LWTUNNEL_ENCAP_NONE ||
307 a->type > LWTUNNEL_ENCAP_MAX)
308 return 0;
309
310 rcu_read_lock();
311 ops = rcu_dereference(lwtun_encaps[a->type]);
312 if (likely(ops && ops->cmp_encap))
313 ret = ops->cmp_encap(a, b);
314 rcu_read_unlock();
315
316 return ret;
317}
318EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
319
320int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
321{
322 const struct lwtunnel_encap_ops *ops;
323 struct lwtunnel_state *lwtstate;
324 struct dst_entry *dst;
325 int ret;
326
327 local_bh_disable();
328
329 if (dev_xmit_recursion()) {
330 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
331 __func__);
332 ret = -ENETDOWN;
333 goto drop;
334 }
335
336 dst = skb_dst(skb);
337 if (!dst) {
338 ret = -EINVAL;
339 goto drop;
340 }
341 lwtstate = dst->lwtstate;
342
343 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
344 lwtstate->type > LWTUNNEL_ENCAP_MAX) {
345 ret = 0;
346 goto out;
347 }
348
349 ret = -EOPNOTSUPP;
350 rcu_read_lock();
351 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
352 if (likely(ops && ops->output)) {
353 dev_xmit_recursion_inc();
354 ret = ops->output(net, sk, skb);
355 dev_xmit_recursion_dec();
356 }
357 rcu_read_unlock();
358
359 if (ret == -EOPNOTSUPP)
360 goto drop;
361
362 goto out;
363
364drop:
365 kfree_skb(skb);
366
367out:
368 local_bh_enable();
369 return ret;
370}
371EXPORT_SYMBOL_GPL(lwtunnel_output);
372
373int lwtunnel_xmit(struct sk_buff *skb)
374{
375 const struct lwtunnel_encap_ops *ops;
376 struct lwtunnel_state *lwtstate;
377 struct dst_entry *dst;
378 int ret;
379
380 local_bh_disable();
381
382 if (dev_xmit_recursion()) {
383 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
384 __func__);
385 ret = -ENETDOWN;
386 goto drop;
387 }
388
389 dst = skb_dst(skb);
390 if (!dst) {
391 ret = -EINVAL;
392 goto drop;
393 }
394
395 lwtstate = dst->lwtstate;
396
397 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
398 lwtstate->type > LWTUNNEL_ENCAP_MAX) {
399 ret = 0;
400 goto out;
401 }
402
403 ret = -EOPNOTSUPP;
404 rcu_read_lock();
405 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
406 if (likely(ops && ops->xmit)) {
407 dev_xmit_recursion_inc();
408 ret = ops->xmit(skb);
409 dev_xmit_recursion_dec();
410 }
411 rcu_read_unlock();
412
413 if (ret == -EOPNOTSUPP)
414 goto drop;
415
416 goto out;
417
418drop:
419 kfree_skb(skb);
420
421out:
422 local_bh_enable();
423 return ret;
424}
425EXPORT_SYMBOL_GPL(lwtunnel_xmit);
426
427int lwtunnel_input(struct sk_buff *skb)
428{
429 const struct lwtunnel_encap_ops *ops;
430 struct lwtunnel_state *lwtstate;
431 struct dst_entry *dst;
432 int ret;
433
434 DEBUG_NET_WARN_ON_ONCE(!in_softirq());
435
436 if (dev_xmit_recursion()) {
437 net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
438 __func__);
439 ret = -ENETDOWN;
440 goto drop;
441 }
442
443 dst = skb_dst(skb);
444 if (!dst) {
445 ret = -EINVAL;
446 goto drop;
447 }
448 lwtstate = dst->lwtstate;
449
450 if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
451 lwtstate->type > LWTUNNEL_ENCAP_MAX)
452 return 0;
453
454 ret = -EOPNOTSUPP;
455 rcu_read_lock();
456 ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
457 if (likely(ops && ops->input)) {
458 dev_xmit_recursion_inc();
459 ret = ops->input(skb);
460 dev_xmit_recursion_dec();
461 }
462 rcu_read_unlock();
463
464 if (ret == -EOPNOTSUPP)
465 goto drop;
466
467 return ret;
468
469drop:
470 kfree_skb(skb);
471
472 return ret;
473}
474EXPORT_SYMBOL_GPL(lwtunnel_input);
475

source code of linux/net/core/lwtunnel.c