Newer
Older
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*/
#include "flow.h"
#include "datapath.h"
#include <linux/uaccess.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <net/llc_pdu.h>
#include <linux/kernel.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/llc.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/rcupdate.h>
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <linux/icmpv6.h>
#include <linux/rculist.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
static struct kmem_cache *flow_cache;
static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
struct sw_flow_key_range *range, u8 val);
static void update_range__(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
{
struct sw_flow_key_range *range = NULL;
size_t start = rounddown(offset, sizeof(long));
size_t end = roundup(offset + size, sizeof(long));
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
if (!is_mask)
range = &match->range;
else if (match->mask)
range = &match->mask->range;
if (!range)
return;
if (range->start == range->end) {
range->start = start;
range->end = end;
return;
}
if (range->start > start)
range->start = start;
if (range->end < end)
range->end = end;
}
#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
do { \
update_range__(match, offsetof(struct sw_flow_key, field), \
sizeof((match)->key->field), is_mask); \
if (is_mask) { \
if ((match)->mask) \
(match)->mask->key.field = value; \
} else { \
(match)->key->field = value; \
} \
} while (0)
#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
do { \
update_range__(match, offsetof(struct sw_flow_key, field), \
len, is_mask); \
if (is_mask) { \
if ((match)->mask) \
memcpy(&(match)->mask->key.field, value_p, len);\
} else { \
memcpy(&(match)->key->field, value_p, len); \
} \
} while (0)
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
return range->end - range->start;
}
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key,
struct sw_flow_mask *mask)
{
memset(match, 0, sizeof(*match));
match->key = key;
match->mask = mask;
memset(key, 0, sizeof(*key));
if (mask) {
memset(&mask->key, 0, sizeof(mask->key));
mask->range.start = mask->range.end = 0;
}
}
static bool ovs_match_validate(const struct sw_flow_match *match,
u64 key_attrs, u64 mask_attrs)
{
u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
u64 mask_allowed = key_attrs; /* At most allow all key attributes */
/* The following mask attributes allowed only if they
* pass the validation tests. */
mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
| (1 << OVS_KEY_ATTR_IPV6)
| (1 << OVS_KEY_ATTR_TCP)
| (1 << OVS_KEY_ATTR_UDP)
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
| (1 << OVS_KEY_ATTR_ICMP)
| (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP)
| (1 << OVS_KEY_ATTR_ND));
/* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
| (1 << OVS_KEY_ATTR_IN_PORT)
| (1 << OVS_KEY_ATTR_ETHERTYPE));
/* Check key attributes. */
if (match->key->eth.type == htons(ETH_P_ARP)
|| match->key->eth.type == htons(ETH_P_RARP)) {
key_expected |= 1 << OVS_KEY_ATTR_ARP;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
}
if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
if (match->key->ip.proto == IPPROTO_UDP) {
key_expected |= 1 << OVS_KEY_ATTR_UDP;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
}
if (match->key->ip.proto == IPPROTO_SCTP) {
key_expected |= 1 << OVS_KEY_ATTR_SCTP;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
}
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
if (match->key->ip.proto == IPPROTO_TCP) {
key_expected |= 1 << OVS_KEY_ATTR_TCP;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
}
if (match->key->ip.proto == IPPROTO_ICMP) {
key_expected |= 1 << OVS_KEY_ATTR_ICMP;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
}
}
}
if (match->key->eth.type == htons(ETH_P_IPV6)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV6;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
if (match->key->ip.proto == IPPROTO_UDP) {
key_expected |= 1 << OVS_KEY_ATTR_UDP;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
}
if (match->key->ip.proto == IPPROTO_SCTP) {
key_expected |= 1 << OVS_KEY_ATTR_SCTP;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
}
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
if (match->key->ip.proto == IPPROTO_TCP) {
key_expected |= 1 << OVS_KEY_ATTR_TCP;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
}
if (match->key->ip.proto == IPPROTO_ICMPV6) {
key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
if (match->mask && (match->mask->key.ip.proto == 0xff))
mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
if (match->key->ipv6.tp.src ==
htons(NDISC_NEIGHBOUR_SOLICITATION) ||
match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
key_expected |= 1 << OVS_KEY_ATTR_ND;
if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_ND;
}
}
}
}
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
key_attrs, key_expected);
return false;
}
if ((mask_attrs & mask_allowed) != mask_attrs) {
/* Mask attributes check failed. */
OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
mask_attrs, mask_allowed);
return false;
}
return true;
}
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
static int check_header(struct sk_buff *skb, int len)
{
if (unlikely(skb->len < len))
return -EINVAL;
if (unlikely(!pskb_may_pull(skb, len)))
return -ENOMEM;
return 0;
}
static bool arphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_network_offset(skb) +
sizeof(struct arp_eth_header));
}
static int check_iphdr(struct sk_buff *skb)
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int ip_len;
int err;
err = check_header(skb, nh_ofs + sizeof(struct iphdr));
if (unlikely(err))
return err;
ip_len = ip_hdrlen(skb);
if (unlikely(ip_len < sizeof(struct iphdr) ||
skb->len < nh_ofs + ip_len))
return -EINVAL;
skb_set_transport_header(skb, nh_ofs + ip_len);
return 0;
}
static bool tcphdr_ok(struct sk_buff *skb)
{
int th_ofs = skb_transport_offset(skb);
int tcp_len;
if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
return false;
tcp_len = tcp_hdrlen(skb);
if (unlikely(tcp_len < sizeof(struct tcphdr) ||
skb->len < th_ofs + tcp_len))
return false;
return true;
}
static bool udphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct udphdr));
}
static bool sctphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct sctphdr));
}
static bool icmphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct icmphdr));
}
u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
struct timespec cur_ts;
u64 cur_ms, idle_ms;
ktime_get_ts(&cur_ts);
idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
cur_ts.tv_nsec / NSEC_PER_MSEC;
return cur_ms - idle_ms;
}
static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int nh_len;
int payload_ofs;
struct ipv6hdr *nh;
uint8_t nexthdr;
__be16 frag_off;
int err;
err = check_header(skb, nh_ofs + sizeof(*nh));
if (unlikely(err))
return err;
nh = ipv6_hdr(skb);
nexthdr = nh->nexthdr;
payload_ofs = (u8 *)(nh + 1) - skb->data;
key->ip.proto = NEXTHDR_NONE;
key->ip.tos = ipv6_get_dsfield(nh);
key->ip.ttl = nh->hop_limit;
key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
key->ipv6.addr.src = nh->saddr;
key->ipv6.addr.dst = nh->daddr;
payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
if (unlikely(payload_ofs < 0))
return -EINVAL;
if (frag_off) {
if (frag_off & htons(~0x7))
key->ip.frag = OVS_FRAG_TYPE_LATER;
else
key->ip.frag = OVS_FRAG_TYPE_FIRST;
}
nh_len = payload_ofs - nh_ofs;
skb_set_transport_header(skb, nh_ofs + nh_len);
key->ip.proto = nexthdr;
return nh_len;
}
static bool icmp6hdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
sizeof(struct icmp6hdr));
}
void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask)
{
const long *m = (long *)((u8 *)&mask->key + mask->range.start);
const long *s = (long *)((u8 *)src + mask->range.start);
long *d = (long *)((u8 *)dst + mask->range.start);
/* The memory outside of the 'mask->range' are not set since
* further operations on 'dst' only uses contents within
* 'mask->range'.
*/
for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
*d++ = *s++ & *m++;
#define TCP_FLAGS_OFFSET 13
#define TCP_FLAG_MASK 0x3f
void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
{
u8 tcp_flags = 0;
if ((flow->key.eth.type == htons(ETH_P_IP) ||
flow->key.eth.type == htons(ETH_P_IPV6)) &&
flow->key.ip.proto == IPPROTO_TCP &&
likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
u8 *tcp = (u8 *)tcp_hdr(skb);
tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
}
spin_lock(&flow->lock);
flow->used = jiffies;
flow->packet_count++;
flow->byte_count += skb->len;
flow->tcp_flags |= tcp_flags;
spin_unlock(&flow->lock);
}
struct sw_flow_actions *ovs_flow_actions_alloc(int size)
{
struct sw_flow_actions *sfa;
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
return ERR_PTR(-ENOMEM);
return sfa;
}
struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
if (!flow)
return ERR_PTR(-ENOMEM);
spin_lock_init(&flow->lock);
flow->sf_acts = NULL;
return flow;
}
static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
{
hash = jhash_1word(hash, table->hash_seed);
return flex_array_get(table->buckets,
(hash & (table->n_buckets - 1)));
}
static struct flex_array *alloc_buckets(unsigned int n_buckets)
{
struct flex_array *buckets;
int i, err;
buckets = flex_array_alloc(sizeof(struct hlist_head),
n_buckets, GFP_KERNEL);
if (!buckets)
return NULL;
err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
if (err) {
flex_array_free(buckets);
return NULL;
}
for (i = 0; i < n_buckets; i++)
INIT_HLIST_HEAD((struct hlist_head *)
flex_array_get(buckets, i));
return buckets;
}
static void free_buckets(struct flex_array *buckets)
{
flex_array_free(buckets);
}
static struct flow_table *__flow_tbl_alloc(int new_size)
{
struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;
table->buckets = alloc_buckets(new_size);
if (!table->buckets) {
kfree(table);
return NULL;
}
table->n_buckets = new_size;
table->count = 0;
table->node_ver = 0;
table->keep_flows = false;
get_random_bytes(&table->hash_seed, sizeof(u32));
static void __flow_tbl_destroy(struct flow_table *table)
{
int i;
if (table->keep_flows)
goto skip_flows;
for (i = 0; i < table->n_buckets; i++) {
struct sw_flow *flow;
struct hlist_head *head = flex_array_get(table->buckets, i);
hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
hlist_del(&flow->hash_node[ver]);
BUG_ON(!list_empty(table->mask_list));
kfree(table->mask_list);
skip_flows:
free_buckets(table->buckets);
kfree(table);
}
struct flow_table *ovs_flow_tbl_alloc(int new_size)
{
struct flow_table *table = __flow_tbl_alloc(new_size);
if (!table)
return NULL;
table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
if (!table->mask_list) {
table->keep_flows = true;
__flow_tbl_destroy(table);
return NULL;
}
INIT_LIST_HEAD(table->mask_list);
return table;
}
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
{
struct flow_table *table = container_of(rcu, struct flow_table, rcu);
void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
{
if (!table)
return;
if (deferred)
call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
else
__flow_tbl_destroy(table);
struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
{
struct sw_flow *flow;
struct hlist_head *head;
int ver;
int i;
ver = table->node_ver;
while (*bucket < table->n_buckets) {
i = 0;
head = flex_array_get(table->buckets, *bucket);
hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
if (i < *last) {
i++;
continue;
}
*last = i + 1;
return flow;
}
(*bucket)++;
*last = 0;
}
return NULL;
}
static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
{
struct hlist_head *head;
head = find_bucket(table, flow->hash);
hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
table->count++;
}
static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
{
int old_ver;
int i;
old_ver = old->node_ver;
new->node_ver = !old_ver;
/* Insert in new table. */
for (i = 0; i < old->n_buckets; i++) {
struct sw_flow *flow;
struct hlist_head *head;
head = flex_array_get(old->buckets, i);
hlist_for_each_entry(flow, head, hash_node[old_ver])
old->keep_flows = true;
}
static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
{
struct flow_table *new_table;
new_table = __flow_tbl_alloc(n_buckets);
if (!new_table)
return ERR_PTR(-ENOMEM);
flow_table_copy_flows(table, new_table);
return new_table;
}
struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
{
return __flow_tbl_rehash(table, table->n_buckets);
}
struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
{
return __flow_tbl_rehash(table, table->n_buckets * 2);
}
static void __flow_free(struct sw_flow *flow)
{
kfree((struct sf_flow_acts __force *)flow->sf_acts);
kmem_cache_free(flow_cache, flow);
}
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
void ovs_flow_free(struct sw_flow *flow, bool deferred)
if (!flow)
return;
ovs_sw_flow_mask_del_ref(flow->mask, deferred);
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
else
__flow_free(flow);
}
/* Schedules 'sf_acts' to be freed after the next RCU grace period.
* The caller must hold rcu_read_lock for this to be sensible. */
void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
{
kfree_rcu(sf_acts, rcu);
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
}
static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
{
struct qtag_prefix {
__be16 eth_type; /* ETH_P_8021Q */
__be16 tci;
};
struct qtag_prefix *qp;
if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
return 0;
if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
sizeof(__be16))))
return -ENOMEM;
qp = (struct qtag_prefix *) skb->data;
key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
__skb_pull(skb, sizeof(struct qtag_prefix));
return 0;
}
static __be16 parse_ethertype(struct sk_buff *skb)
{
struct llc_snap_hdr {
u8 dsap; /* Always 0xAA */
u8 ssap; /* Always 0xAA */
u8 ctrl;
u8 oui[3];
__be16 ethertype;
};
struct llc_snap_hdr *llc;
__be16 proto;
proto = *(__be16 *) skb->data;
__skb_pull(skb, sizeof(__be16));
return proto;
if (skb->len < sizeof(struct llc_snap_hdr))
return htons(ETH_P_802_2);
if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
return htons(0);
llc = (struct llc_snap_hdr *) skb->data;
if (llc->dsap != LLC_SAP_SNAP ||
llc->ssap != LLC_SAP_SNAP ||
(llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
return htons(ETH_P_802_2);
__skb_pull(skb, sizeof(struct llc_snap_hdr));
return llc->ethertype;
return htons(ETH_P_802_2);
}
static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
{
struct icmp6hdr *icmp = icmp6_hdr(skb);
/* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
key->ipv6.tp.src = htons(icmp->icmp6_type);
key->ipv6.tp.dst = htons(icmp->icmp6_code);
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
int icmp_len = skb->len - skb_transport_offset(skb);
struct nd_msg *nd;
int offset;
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
if (unlikely(icmp_len < sizeof(*nd)))
return 0;
if (unlikely(skb_linearize(skb)))
return -ENOMEM;
nd = (struct nd_msg *)skb_transport_header(skb);
key->ipv6.nd.target = nd->target;
icmp_len -= sizeof(*nd);
offset = 0;
while (icmp_len >= 8) {
struct nd_opt_hdr *nd_opt =
(struct nd_opt_hdr *)(nd->opt + offset);
int opt_len = nd_opt->nd_opt_len * 8;
if (unlikely(!opt_len || opt_len > icmp_len))
/* Store the link layer address if the appropriate
* option is provided. It is considered an error if
* the same link layer option is specified twice.
*/
if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
&& opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
goto invalid;
memcpy(key->ipv6.nd.sll,
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
&& opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
goto invalid;
memcpy(key->ipv6.nd.tll,
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
}
icmp_len -= opt_len;
offset += opt_len;
}
}
invalid:
memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
}
/**
* ovs_flow_extract - extracts a flow key from an Ethernet frame.
* @skb: sk_buff that contains the frame, with skb->data pointing to the
* Ethernet header
* @in_port: port number on which @skb was received.
* @key: output flow key
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
* Returns 0 if successful, otherwise a negative errno value.
*
* Initializes @skb header pointers as follows:
*
* - skb->mac_header: the Ethernet header.
*
* - skb->network_header: just past the Ethernet header, or just past the
* VLAN header, to the first byte of the Ethernet payload.
*
* - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
* on output, then just past the IP header, if one is present and
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
struct ethhdr *eth;
memset(key, 0, sizeof(*key));
key->phy.priority = skb->priority;
if (OVS_CB(skb)->tun_key)
memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
key->phy.skb_mark = skb->mark;
skb_reset_mac_header(skb);
/* Link layer. We are guaranteed to have at least the 14 byte Ethernet
* header in the linear data area.
*/
eth = eth_hdr(skb);
memcpy(key->eth.src, eth->h_source, ETH_ALEN);
memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
__skb_pull(skb, 2 * ETH_ALEN);
/* We are going to push all headers that we pull, so no need to
* update skb->csum here.
*/
if (vlan_tx_tag_present(skb))
key->eth.tci = htons(skb->vlan_tci);
else if (eth->h_proto == htons(ETH_P_8021Q))
if (unlikely(parse_vlan(skb, key)))
return -ENOMEM;
key->eth.type = parse_ethertype(skb);
if (unlikely(key->eth.type == htons(0)))
return -ENOMEM;
skb_reset_network_header(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
/* Network layer. */
if (key->eth.type == htons(ETH_P_IP)) {
struct iphdr *nh;
__be16 offset;
error = check_iphdr(skb);
if (unlikely(error)) {
if (error == -EINVAL) {
skb->transport_header = skb->network_header;
error = 0;
}
}
nh = ip_hdr(skb);
key->ipv4.addr.src = nh->saddr;
key->ipv4.addr.dst = nh->daddr;
key->ip.proto = nh->protocol;
key->ip.tos = nh->tos;
key->ip.ttl = nh->ttl;
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
}
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == IPPROTO_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
}
} else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
}
} else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->ipv4.tp.src = sctp->source;
key->ipv4.tp.dst = sctp->dest;
}
} else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store
* them in 16-bit network byte order. */
key->ipv4.tp.src = htons(icmp->type);
key->ipv4.tp.dst = htons(icmp->code);
}
}

Mehak Mahajan
committed
} else if ((key->eth.type == htons(ETH_P_ARP) ||
key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
struct arp_eth_header *arp;
arp = (struct arp_eth_header *)skb_network_header(skb);
if (arp->ar_hrd == htons(ARPHRD_ETHER)
&& arp->ar_pro == htons(ETH_P_IP)
&& arp->ar_hln == ETH_ALEN
&& arp->ar_pln == 4) {
/* We only match on the lower 8 bits of the opcode. */
if (ntohs(arp->ar_op) <= 0xff)
key->ip.proto = ntohs(arp->ar_op);
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
skb->transport_header = skb->network_header;
}
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp.src = tcp->source;
key->ipv6.tp.dst = tcp->dest;
}
} else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv6.tp.src = udp->source;
key->ipv6.tp.dst = udp->dest;
}
} else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->ipv6.tp.src = sctp->source;
key->ipv6.tp.dst = sctp->dest;
}
} else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
error = parse_icmpv6(skb, key, nh_len);
if (error)
return error;