| 1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
| 2 | /* Copyright (c) 2020 Mellanox Technologies. */ |
| 3 | |
| 4 | #include <linux/refcount.h> |
| 5 | #include <linux/list.h> |
| 6 | #include <linux/rculist.h> |
| 7 | #include <linux/rtnetlink.h> |
| 8 | #include <linux/workqueue.h> |
| 9 | #include <linux/spinlock.h> |
| 10 | #include <linux/notifier.h> |
| 11 | #include <net/netevent.h> |
| 12 | #include <net/arp.h> |
| 13 | #include "neigh.h" |
| 14 | #include "tc.h" |
| 15 | #include "en_rep.h" |
| 16 | #include "fs_core.h" |
| 17 | #include "diag/en_rep_tracepoint.h" |
| 18 | |
| 19 | static unsigned long mlx5e_rep_ipv6_interval(void) |
| 20 | { |
| 21 | if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) |
| 22 | return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); |
| 23 | |
| 24 | return ~0UL; |
| 25 | } |
| 26 | |
| 27 | static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) |
| 28 | { |
| 29 | unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); |
| 30 | unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); |
| 31 | struct net_device *netdev = rpriv->netdev; |
| 32 | struct mlx5e_priv *priv = netdev_priv(dev: netdev); |
| 33 | |
| 34 | rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); |
| 35 | mlx5_fc_update_sampling_interval(dev: priv->mdev, interval: rpriv->neigh_update.min_interval); |
| 36 | } |
| 37 | |
| 38 | void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) |
| 39 | { |
| 40 | struct mlx5e_rep_priv *rpriv = priv->ppriv; |
| 41 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
| 42 | |
| 43 | mlx5_fc_queue_stats_work(dev: priv->mdev, |
| 44 | dwork: &neigh_update->neigh_stats_work, |
| 45 | delay: neigh_update->min_interval); |
| 46 | } |
| 47 | |
| 48 | static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) |
| 49 | { |
| 50 | return refcount_inc_not_zero(r: &nhe->refcnt); |
| 51 | } |
| 52 | |
| 53 | static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); |
| 54 | |
| 55 | void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) |
| 56 | { |
| 57 | if (refcount_dec_and_test(r: &nhe->refcnt)) { |
| 58 | mlx5e_rep_neigh_entry_remove(nhe); |
| 59 | kfree_rcu(nhe, rcu); |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | static struct mlx5e_neigh_hash_entry * |
| 64 | mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, |
| 65 | struct mlx5e_neigh_hash_entry *nhe) |
| 66 | { |
| 67 | struct mlx5e_neigh_hash_entry *next = NULL; |
| 68 | |
| 69 | rcu_read_lock(); |
| 70 | |
| 71 | for (next = nhe ? |
| 72 | list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, |
| 73 | &nhe->neigh_list, |
| 74 | struct mlx5e_neigh_hash_entry, |
| 75 | neigh_list) : |
| 76 | list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, |
| 77 | struct mlx5e_neigh_hash_entry, |
| 78 | neigh_list); |
| 79 | next; |
| 80 | next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, |
| 81 | &next->neigh_list, |
| 82 | struct mlx5e_neigh_hash_entry, |
| 83 | neigh_list)) |
| 84 | if (mlx5e_rep_neigh_entry_hold(nhe: next)) |
| 85 | break; |
| 86 | |
| 87 | rcu_read_unlock(); |
| 88 | |
| 89 | if (nhe) |
| 90 | mlx5e_rep_neigh_entry_release(nhe); |
| 91 | |
| 92 | return next; |
| 93 | } |
| 94 | |
| 95 | static void mlx5e_rep_neigh_stats_work(struct work_struct *work) |
| 96 | { |
| 97 | struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, |
| 98 | neigh_update.neigh_stats_work.work); |
| 99 | struct net_device *netdev = rpriv->netdev; |
| 100 | struct mlx5e_priv *priv = netdev_priv(dev: netdev); |
| 101 | struct mlx5e_neigh_hash_entry *nhe = NULL; |
| 102 | |
| 103 | rtnl_lock(); |
| 104 | if (!list_empty(head: &rpriv->neigh_update.neigh_list)) |
| 105 | mlx5e_rep_queue_neigh_stats_work(priv); |
| 106 | |
| 107 | while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) |
| 108 | mlx5e_tc_update_neigh_used_value(nhe); |
| 109 | |
| 110 | rtnl_unlock(); |
| 111 | } |
| 112 | |
| 113 | struct neigh_update_work { |
| 114 | struct work_struct work; |
| 115 | struct neighbour *n; |
| 116 | struct mlx5e_neigh_hash_entry *nhe; |
| 117 | }; |
| 118 | |
| 119 | static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) |
| 120 | { |
| 121 | neigh_release(neigh: update_work->n); |
| 122 | mlx5e_rep_neigh_entry_release(nhe: update_work->nhe); |
| 123 | kfree(objp: update_work); |
| 124 | } |
| 125 | |
| 126 | static void mlx5e_rep_neigh_update(struct work_struct *work) |
| 127 | { |
| 128 | struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, |
| 129 | work); |
| 130 | struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; |
| 131 | struct neighbour *n = update_work->n; |
| 132 | struct mlx5e_encap_entry *e = NULL; |
| 133 | bool neigh_connected, same_dev; |
| 134 | unsigned char ha[ETH_ALEN]; |
| 135 | u8 nud_state, dead; |
| 136 | |
| 137 | rtnl_lock(); |
| 138 | |
| 139 | /* If these parameters are changed after we release the lock, |
| 140 | * we'll receive another event letting us know about it. |
| 141 | * We use this lock to avoid inconsistency between the neigh validity |
| 142 | * and it's hw address. |
| 143 | */ |
| 144 | read_lock_bh(&n->lock); |
| 145 | memcpy(ha, n->ha, ETH_ALEN); |
| 146 | nud_state = n->nud_state; |
| 147 | dead = n->dead; |
| 148 | same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; |
| 149 | read_unlock_bh(&n->lock); |
| 150 | |
| 151 | neigh_connected = (nud_state & NUD_VALID) && !dead; |
| 152 | |
| 153 | trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); |
| 154 | |
| 155 | if (!same_dev) |
| 156 | goto out; |
| 157 | |
| 158 | /* mlx5e_get_next_init_encap() releases previous encap before returning |
| 159 | * the next one. |
| 160 | */ |
| 161 | while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) |
| 162 | mlx5e_rep_update_flows(priv: netdev_priv(dev: e->out_dev), e, neigh_connected, ha); |
| 163 | |
| 164 | out: |
| 165 | rtnl_unlock(); |
| 166 | mlx5e_release_neigh_update_work(update_work); |
| 167 | } |
| 168 | |
| 169 | static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, |
| 170 | struct neighbour *n) |
| 171 | { |
| 172 | struct neigh_update_work *update_work; |
| 173 | struct mlx5e_neigh_hash_entry *nhe; |
| 174 | struct mlx5e_neigh m_neigh = {}; |
| 175 | |
| 176 | update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); |
| 177 | if (WARN_ON(!update_work)) |
| 178 | return NULL; |
| 179 | |
| 180 | m_neigh.family = n->ops->family; |
| 181 | memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); |
| 182 | |
| 183 | /* Obtain reference to nhe as last step in order not to release it in |
| 184 | * atomic context. |
| 185 | */ |
| 186 | rcu_read_lock(); |
| 187 | nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh: &m_neigh); |
| 188 | rcu_read_unlock(); |
| 189 | if (!nhe) { |
| 190 | kfree(objp: update_work); |
| 191 | return NULL; |
| 192 | } |
| 193 | |
| 194 | INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); |
| 195 | neigh_hold(n); |
| 196 | update_work->n = n; |
| 197 | update_work->nhe = nhe; |
| 198 | |
| 199 | return update_work; |
| 200 | } |
| 201 | |
| 202 | static int mlx5e_rep_netevent_event(struct notifier_block *nb, |
| 203 | unsigned long event, void *ptr) |
| 204 | { |
| 205 | struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, |
| 206 | neigh_update.netevent_nb); |
| 207 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
| 208 | struct net_device *netdev = rpriv->netdev; |
| 209 | struct mlx5e_priv *priv = netdev_priv(dev: netdev); |
| 210 | struct mlx5e_neigh_hash_entry *nhe = NULL; |
| 211 | struct neigh_update_work *update_work; |
| 212 | struct neigh_parms *p; |
| 213 | struct neighbour *n; |
| 214 | bool found = false; |
| 215 | |
| 216 | switch (event) { |
| 217 | case NETEVENT_NEIGH_UPDATE: |
| 218 | n = ptr; |
| 219 | #if IS_ENABLED(CONFIG_IPV6) |
| 220 | if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) |
| 221 | #else |
| 222 | if (n->tbl != &arp_tbl) |
| 223 | #endif |
| 224 | return NOTIFY_DONE; |
| 225 | |
| 226 | update_work = mlx5e_alloc_neigh_update_work(priv, n); |
| 227 | if (!update_work) |
| 228 | return NOTIFY_DONE; |
| 229 | |
| 230 | queue_work(wq: priv->wq, work: &update_work->work); |
| 231 | break; |
| 232 | |
| 233 | case NETEVENT_DELAY_PROBE_TIME_UPDATE: |
| 234 | p = ptr; |
| 235 | |
| 236 | /* We check the device is present since we don't care about |
| 237 | * changes in the default table, we only care about changes |
| 238 | * done per device delay prob time parameter. |
| 239 | */ |
| 240 | #if IS_ENABLED(CONFIG_IPV6) |
| 241 | if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) |
| 242 | #else |
| 243 | if (!p->dev || p->tbl != &arp_tbl) |
| 244 | #endif |
| 245 | return NOTIFY_DONE; |
| 246 | |
| 247 | rcu_read_lock(); |
| 248 | list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, |
| 249 | neigh_list) { |
| 250 | if (p->dev == READ_ONCE(nhe->neigh_dev)) { |
| 251 | found = true; |
| 252 | break; |
| 253 | } |
| 254 | } |
| 255 | rcu_read_unlock(); |
| 256 | if (!found) |
| 257 | return NOTIFY_DONE; |
| 258 | |
| 259 | neigh_update->min_interval = min_t(unsigned long, |
| 260 | NEIGH_VAR(p, DELAY_PROBE_TIME), |
| 261 | neigh_update->min_interval); |
| 262 | mlx5_fc_update_sampling_interval(dev: priv->mdev, |
| 263 | interval: neigh_update->min_interval); |
| 264 | break; |
| 265 | } |
| 266 | return NOTIFY_DONE; |
| 267 | } |
| 268 | |
| 269 | static const struct rhashtable_params mlx5e_neigh_ht_params = { |
| 270 | .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), |
| 271 | .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), |
| 272 | .key_len = sizeof(struct mlx5e_neigh), |
| 273 | .automatic_shrinking = true, |
| 274 | }; |
| 275 | |
| 276 | int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) |
| 277 | { |
| 278 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
| 279 | int err; |
| 280 | |
| 281 | err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); |
| 282 | if (err) |
| 283 | goto out_err; |
| 284 | |
| 285 | INIT_LIST_HEAD(list: &neigh_update->neigh_list); |
| 286 | mutex_init(&neigh_update->encap_lock); |
| 287 | INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, |
| 288 | mlx5e_rep_neigh_stats_work); |
| 289 | mlx5e_rep_neigh_update_init_interval(rpriv); |
| 290 | |
| 291 | neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event; |
| 292 | err = register_netevent_notifier(nb: &neigh_update->netevent_nb); |
| 293 | if (err) |
| 294 | goto out_notifier; |
| 295 | return 0; |
| 296 | |
| 297 | out_notifier: |
| 298 | neigh_update->netevent_nb.notifier_call = NULL; |
| 299 | rhashtable_destroy(ht: &neigh_update->neigh_ht); |
| 300 | out_err: |
| 301 | netdev_warn(dev: rpriv->netdev, |
| 302 | format: "Failed to initialize neighbours handling for vport %d\n" , |
| 303 | rpriv->rep->vport); |
| 304 | return err; |
| 305 | } |
| 306 | |
| 307 | void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) |
| 308 | { |
| 309 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
| 310 | struct mlx5e_priv *priv = netdev_priv(dev: rpriv->netdev); |
| 311 | |
| 312 | if (!rpriv->neigh_update.netevent_nb.notifier_call) |
| 313 | return; |
| 314 | |
| 315 | unregister_netevent_notifier(nb: &neigh_update->netevent_nb); |
| 316 | |
| 317 | flush_workqueue(priv->wq); /* flush neigh update works */ |
| 318 | |
| 319 | cancel_delayed_work_sync(dwork: &rpriv->neigh_update.neigh_stats_work); |
| 320 | |
| 321 | mutex_destroy(lock: &neigh_update->encap_lock); |
| 322 | rhashtable_destroy(ht: &neigh_update->neigh_ht); |
| 323 | } |
| 324 | |
| 325 | static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, |
| 326 | struct mlx5e_neigh_hash_entry *nhe) |
| 327 | { |
| 328 | struct mlx5e_rep_priv *rpriv = priv->ppriv; |
| 329 | int err; |
| 330 | |
| 331 | err = rhashtable_insert_fast(ht: &rpriv->neigh_update.neigh_ht, |
| 332 | obj: &nhe->rhash_node, |
| 333 | params: mlx5e_neigh_ht_params); |
| 334 | if (err) |
| 335 | return err; |
| 336 | |
| 337 | list_add_rcu(new: &nhe->neigh_list, head: &rpriv->neigh_update.neigh_list); |
| 338 | |
| 339 | return err; |
| 340 | } |
| 341 | |
| 342 | static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) |
| 343 | { |
| 344 | struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; |
| 345 | |
| 346 | mutex_lock(&rpriv->neigh_update.encap_lock); |
| 347 | |
| 348 | list_del_rcu(entry: &nhe->neigh_list); |
| 349 | |
| 350 | rhashtable_remove_fast(ht: &rpriv->neigh_update.neigh_ht, |
| 351 | obj: &nhe->rhash_node, |
| 352 | params: mlx5e_neigh_ht_params); |
| 353 | mutex_unlock(lock: &rpriv->neigh_update.encap_lock); |
| 354 | } |
| 355 | |
| 356 | /* This function must only be called under the representor's encap_lock or |
| 357 | * inside rcu read lock section. |
| 358 | */ |
| 359 | struct mlx5e_neigh_hash_entry * |
| 360 | mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, |
| 361 | struct mlx5e_neigh *m_neigh) |
| 362 | { |
| 363 | struct mlx5e_rep_priv *rpriv = priv->ppriv; |
| 364 | struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; |
| 365 | struct mlx5e_neigh_hash_entry *nhe; |
| 366 | |
| 367 | nhe = rhashtable_lookup_fast(ht: &neigh_update->neigh_ht, key: m_neigh, |
| 368 | params: mlx5e_neigh_ht_params); |
| 369 | return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; |
| 370 | } |
| 371 | |
| 372 | int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, |
| 373 | struct mlx5e_neigh *m_neigh, |
| 374 | struct net_device *neigh_dev, |
| 375 | struct mlx5e_neigh_hash_entry **nhe) |
| 376 | { |
| 377 | int err; |
| 378 | |
| 379 | *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); |
| 380 | if (!*nhe) |
| 381 | return -ENOMEM; |
| 382 | |
| 383 | (*nhe)->priv = priv; |
| 384 | memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); |
| 385 | spin_lock_init(&(*nhe)->encap_list_lock); |
| 386 | INIT_LIST_HEAD(list: &(*nhe)->encap_list); |
| 387 | refcount_set(r: &(*nhe)->refcnt, n: 1); |
| 388 | WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); |
| 389 | |
| 390 | err = mlx5e_rep_neigh_entry_insert(priv, nhe: *nhe); |
| 391 | if (err) |
| 392 | goto out_free; |
| 393 | return 0; |
| 394 | |
| 395 | out_free: |
| 396 | kfree(objp: *nhe); |
| 397 | return err; |
| 398 | } |
| 399 | |