Newer
Older
int nr_frags = shinfo->nr_frags;
int i, err, start;
u16 peek; /* peek into next tx request */
/* Check status of header. */
err = gop->status;
if (unlikely(err))
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
for (i = start; i < nr_frags; i++) {
int j, newerr;
pending_ring_idx_t head;
pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
tx_info = &vif->pending_tx_info[pending_idx];
head = tx_info->head;
/* Check error status: if okay then remember grant handle. */
do {
newerr = (++gop)->status;
if (newerr)
break;
peek = vif->pending_ring[pending_index(++head)];
} while (!pending_tx_is_head(vif, peek));
if (likely(!newerr)) {
/* Had a previous error? Invalidate this fragment. */
if (unlikely(err))
xenvif_idx_release(vif, pending_idx,
XEN_NETIF_RSP_OKAY);
continue;
}
/* Error on this fragment: respond to client with an error. */
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
/* Not the first error? Preceding frags already invalidated. */
if (err)
continue;
/* First error: invalidate header and preceding fragments. */
pending_idx = *((u16 *)skb->data);
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
xenvif_idx_release(vif, pending_idx,
XEN_NETIF_RSP_OKAY);
}
/* Remember the error: invalidate all subsequent fragments. */
err = newerr;
}
*gopp = gop + 1;
return err;
}
static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int nr_frags = shinfo->nr_frags;
int i;
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = shinfo->frags + i;
struct xen_netif_tx_request *txp;
struct page *page;
u16 pending_idx;
pending_idx = frag_get_pending_idx(frag);
txp = &vif->pending_tx_info[pending_idx].req;
page = virt_to_page(idx_to_kaddr(vif, pending_idx));
__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
skb->len += txp->size;
skb->data_len += txp->size;
skb->truesize += txp->size;
/* Take an extra reference to offset xenvif_idx_release */
get_page(vif->mmap_pages[pending_idx]);
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
static int xenvif_get_extras(struct xenvif *vif,
struct xen_netif_extra_info *extras,
int work_to_do)
{
struct xen_netif_extra_info extra;
RING_IDX cons = vif->tx.req_cons;
do {
if (unlikely(work_to_do-- <= 0)) {
netdev_err(vif->dev, "Missing extra info\n");
return -EBADR;
}
memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons),
sizeof(extra));
if (unlikely(!extra.type ||
extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
vif->tx.req_cons = ++cons;
netdev_err(vif->dev,
return -EINVAL;
}
memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
vif->tx.req_cons = ++cons;
} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
return work_to_do;
}
static int xenvif_set_skb_gso(struct xenvif *vif,
struct sk_buff *skb,
struct xen_netif_extra_info *gso)
netdev_err(vif->dev, "GSO size must not be zero.\n");
switch (gso->u.gso.type) {
case XEN_NETIF_GSO_TYPE_TCPV4:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
break;
case XEN_NETIF_GSO_TYPE_TCPV6:
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
break;
default:
netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
return -EINVAL;
}
skb_shinfo(skb)->gso_size = gso->u.gso.size;
/* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
skb_shinfo(skb)->gso_segs = 0;
return 0;
}
static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len,
unsigned int max)
if (skb_headlen(skb) >= len)
return 0;
/* If we need to pullup then pullup to the max, so we
* won't need to do it again.
*/
if (max > skb->len)
max = skb->len;
if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
return -ENOMEM;
if (skb_headlen(skb) < len)
return -EPROTO;
return 0;
/* This value should be large enough to cover a tagged ethernet header plus
* maximally sized IP and TCP or UDP headers.
*/
#define MAX_IP_HDR_LEN 128
static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
int recalculate_partial_csum)
unsigned int off;
bool fragment;
int err;
fragment = false;
err = maybe_pull_tail(skb,
sizeof(struct iphdr),
MAX_IP_HDR_LEN);
if (err < 0)
goto out;
if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
fragment = true;
off = ip_hdrlen(skb);
switch (ip_hdr(skb)->protocol) {
err = maybe_pull_tail(skb,
off + sizeof(struct tcphdr),
MAX_IP_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct tcphdr, check)))
goto out;
if (recalculate_partial_csum)
tcp_hdr(skb)->check =
~csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr,
skb->len - off,
IPPROTO_TCP, 0);
err = maybe_pull_tail(skb,
off + sizeof(struct udphdr),
MAX_IP_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct udphdr, check)))
goto out;
if (recalculate_partial_csum)
udp_hdr(skb)->check =
~csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr,
skb->len - off,
IPPROTO_UDP, 0);
break;
default:
goto out;
}
err = 0;
out:
return err;
}
/* This value should be large enough to cover a tagged ethernet header plus
* an IPv6 header, all options, and a maximal TCP or UDP header.
*/
#define MAX_IPV6_HDR_LEN 256
#define OPT_HDR(type, skb, off) \
(type *)(skb_network_header(skb) + (off))
static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
int recalculate_partial_csum)
{
u8 nexthdr;
unsigned int off;
bool fragment;
bool done;
done = false;
off = sizeof(struct ipv6hdr);
err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
nexthdr = ipv6_hdr(skb)->nexthdr;
len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
while (off <= len && !done) {
switch (nexthdr) {
case IPPROTO_DSTOPTS:
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING: {
struct ipv6_opt_hdr *hp;
err = maybe_pull_tail(skb,
off +
sizeof(struct ipv6_opt_hdr),
MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
nexthdr = hp->nexthdr;
off += ipv6_optlen(hp);
break;
}
case IPPROTO_AH: {
struct ip_auth_hdr *hp;
err = maybe_pull_tail(skb,
off +
sizeof(struct ip_auth_hdr),
MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
hp = OPT_HDR(struct ip_auth_hdr, skb, off);
nexthdr = hp->nexthdr;
off += ipv6_authlen(hp);
break;
}
case IPPROTO_FRAGMENT: {
struct frag_hdr *hp;
err = maybe_pull_tail(skb,
off +
sizeof(struct frag_hdr),
MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
hp = OPT_HDR(struct frag_hdr, skb, off);
if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
fragment = true;
nexthdr = hp->nexthdr;
off += sizeof(struct frag_hdr);
break;
}
default:
done = true;
break;
}
}
if (!done || fragment)
goto out;
switch (nexthdr) {
case IPPROTO_TCP:
err = maybe_pull_tail(skb,
off + sizeof(struct tcphdr),
MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct tcphdr, check)))
goto out;
if (recalculate_partial_csum)
tcp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len - off,
IPPROTO_TCP, 0);
break;
case IPPROTO_UDP:
err = maybe_pull_tail(skb,
off + sizeof(struct udphdr),
MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct udphdr, check)))
goto out;
if (recalculate_partial_csum)
udp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len - off,
IPPROTO_UDP, 0);
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
break;
default:
goto out;
}
err = 0;
out:
return err;
}
static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
{
int err = -EPROTO;
int recalculate_partial_csum = 0;
/* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
* peers can fail to set NETRXF_csum_blank when sending a GSO
* frame. In this case force the SKB to CHECKSUM_PARTIAL and
* recalculate the partial checksum.
*/
if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
vif->rx_gso_checksum_fixup++;
skb->ip_summed = CHECKSUM_PARTIAL;
recalculate_partial_csum = 1;
}
/* A non-CHECKSUM_PARTIAL SKB does not require setup. */
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0;
if (skb->protocol == htons(ETH_P_IP))
err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
else if (skb->protocol == htons(ETH_P_IPV6))
err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
return err;
}
static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
{
u64 now = get_jiffies_64();
u64 next_credit = vif->credit_window_start +
msecs_to_jiffies(vif->credit_usec / 1000);
/* Timer could already be pending in rare cases. */
if (timer_pending(&vif->credit_timeout))
return true;
/* Passed the point where we can replenish credit? */
if (time_after_eq64(now, next_credit)) {
vif->credit_window_start = now;
tx_add_credit(vif);
}
/* Still too big to send right now? Set a callback. */
if (size > vif->remaining_credit) {
vif->credit_timeout.data =
(unsigned long)vif;
vif->credit_timeout.function =
tx_credit_callback;
mod_timer(&vif->credit_timeout,
next_credit);
vif->credit_window_start = next_credit;
return true;
}
return false;
}
static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
< MAX_PENDING_REQS) &&
(skb_queue_len(&vif->tx_queue) < budget)) {
struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
struct page *page;
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
u16 pending_idx;
RING_IDX idx;
int work_to_do;
unsigned int data_len;
pending_ring_idx_t index;
if (vif->tx.sring->req_prod - vif->tx.req_cons >
XEN_NETIF_TX_RING_SIZE) {
netdev_err(vif->dev,
"Impossible number of requests. "
"req_prod %d, req_cons %d, size %ld\n",
vif->tx.sring->req_prod, vif->tx.req_cons,
XEN_NETIF_TX_RING_SIZE);
continue;
}
work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx);
idx = vif->tx.req_cons;
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq));
/* Credit-based scheduling. */
if (txreq.size > vif->remaining_credit &&
tx_credit_exceeded(vif, txreq.size))
break;
vif->remaining_credit -= txreq.size;
work_to_do--;
vif->tx.req_cons = ++idx;
memset(extras, 0, sizeof(extras));
if (txreq.flags & XEN_NETTXF_extra_info) {
work_to_do = xenvif_get_extras(vif, extras,
work_to_do);
if (unlikely(work_to_do < 0))
ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do);
if (unlikely(ret < 0))
idx += ret;
if (unlikely(txreq.size < ETH_HLEN)) {
netdev_dbg(vif->dev,
"Bad packet size: %d\n", txreq.size);
}
/* No crossing a page as the payload mustn't fragment. */
if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
netdev_err(vif->dev,
"txreq.offset: %x, size: %u, end: %lu\n",
txreq.offset, txreq.size,
(txreq.offset&~PAGE_MASK) + txreq.size);
index = pending_index(vif->pending_cons);
pending_idx = vif->pending_ring[index];
data_len = (txreq.size > PKT_PROT_LEN &&
PKT_PROT_LEN : txreq.size;
skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(skb == NULL)) {
netdev_dbg(vif->dev,
"Can't allocate a skb in start_xmit.\n");
break;
}
/* Packets passed to netif_rx() must have some headroom. */
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
struct xen_netif_extra_info *gso;
gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
if (xenvif_set_skb_gso(vif, skb, gso)) {
/* Failure in xenvif_set_skb_gso is fatal. */
}
}
/* XXX could copy straight to head */
}
gop->source.u.ref = txreq.gref;
gop->source.domid = vif->domid;
gop->source.offset = txreq.offset;
gop->dest.u.gmfn = virt_to_mfn(page_address(page));
gop->dest.domid = DOMID_SELF;
gop->dest.offset = txreq.offset;
gop->len = txreq.size;
gop->flags = GNTCOPY_source_gref;
gop++;
memcpy(&vif->pending_tx_info[pending_idx].req,
vif->pending_tx_info[pending_idx].head = index;
*((u16 *)skb->data) = pending_idx;
__skb_put(skb, data_len);
skb_shinfo(skb)->nr_frags = ret;
if (data_len < txreq.size) {
skb_shinfo(skb)->nr_frags++;
frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
pending_idx);
frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
INVALID_PENDING_IDX);
request_gop = xenvif_get_requests(vif, skb, txfrags, gop);
if (request_gop == NULL) {
kfree_skb(skb);
__skb_queue_tail(&vif->tx_queue, skb);
if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
return gop - vif->tx_copy_ops;
static int xenvif_tx_submit(struct xenvif *vif)
struct gnttab_copy *gop = vif->tx_copy_ops;
while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) {
struct xen_netif_tx_request *txp;
u16 pending_idx;
unsigned data_len;
pending_idx = *((u16 *)skb->data);
txp = &vif->pending_tx_info[pending_idx].req;
if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) {
netdev_dbg(vif->dev, "netback grant failed.\n");
skb_shinfo(skb)->nr_frags = 0;
kfree_skb(skb);
continue;
}
data_len = skb->len;
memcpy(skb->data,
(void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
data_len);
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
txp->offset += data_len;
txp->size -= data_len;
} else {
/* Schedule a response immediately. */
xenvif_idx_release(vif, pending_idx,
XEN_NETIF_RSP_OKAY);
}
if (txp->flags & XEN_NETTXF_csum_blank)
skb->ip_summed = CHECKSUM_PARTIAL;
else if (txp->flags & XEN_NETTXF_data_validated)
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
int target = min_t(int, skb->len, PKT_PROT_LEN);
__pskb_pull_tail(skb, target - skb_headlen(skb));
}
skb->dev = vif->dev;
skb->protocol = eth_type_trans(skb, skb->dev);
skb_reset_network_header(skb);
if (checksum_setup(vif, skb)) {
netdev_dbg(vif->dev,
"Can't setup checksum in net_tx_action\n");
kfree_skb(skb);
continue;
}
skb_probe_transport_header(skb, 0);
vif->dev->stats.rx_bytes += skb->len;
vif->dev->stats.rx_packets++;
work_done++;
netif_receive_skb(skb);
}
/* Called after netfront has transmitted */
int xenvif_tx_action(struct xenvif *vif, int budget)
if (unlikely(!tx_work_todo(vif)))
return 0;
nr_gops = xenvif_tx_build_gops(vif, budget);
return 0;
gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
work_done = xenvif_tx_submit(vif);
static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
u8 status)
{
struct pending_tx_info *pending_tx_info;
pending_ring_idx_t head;
u16 peek; /* peek into next tx request */
BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
if (vif->mmap_pages[pending_idx] == NULL)
pending_tx_info = &vif->pending_tx_info[pending_idx];
head = pending_tx_info->head;
BUG_ON(!pending_tx_is_head(vif, head));
BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
do {
pending_ring_idx_t index;
pending_ring_idx_t idx = pending_index(head);
u16 info_idx = vif->pending_ring[idx];
pending_tx_info = &vif->pending_tx_info[info_idx];
make_tx_response(vif, &pending_tx_info->req, status);
/* Setting any number other than
* INVALID_PENDING_RING_IDX indicates this slot is
* starting a new packet / ending a previous packet.
*/
pending_tx_info->head = 0;
index = pending_index(vif->pending_prod++);
vif->pending_ring[index] = vif->pending_ring[info_idx];
peek = vif->pending_ring[pending_index(++head)];
} while (!pending_tx_is_head(vif, peek));
put_page(vif->mmap_pages[pending_idx]);
vif->mmap_pages[pending_idx] = NULL;
static void make_tx_response(struct xenvif *vif,
struct xen_netif_tx_request *txp,
s8 st)
{
RING_IDX i = vif->tx.rsp_prod_pvt;
struct xen_netif_tx_response *resp;
int notify;
resp = RING_GET_RESPONSE(&vif->tx, i);
resp->id = txp->id;
resp->status = st;
if (txp->flags & XEN_NETTXF_extra_info)
RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL;
vif->tx.rsp_prod_pvt = ++i;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify);
if (notify)
notify_remote_via_irq(vif->tx_irq);
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
}
static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
u16 id,
s8 st,
u16 offset,
u16 size,
u16 flags)
{
RING_IDX i = vif->rx.rsp_prod_pvt;
struct xen_netif_rx_response *resp;
resp = RING_GET_RESPONSE(&vif->rx, i);
resp->offset = offset;
resp->flags = flags;
resp->id = id;
resp->status = (s16)size;
if (st < 0)
resp->status = (s16)st;
vif->rx.rsp_prod_pvt = ++i;
return resp;
}
static inline int rx_work_todo(struct xenvif *vif)
return !skb_queue_empty(&vif->rx_queue);
static inline int tx_work_todo(struct xenvif *vif)
if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) &&
(nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
< MAX_PENDING_REQS))
void xenvif_unmap_frontend_rings(struct xenvif *vif)
if (vif->tx.sring)
xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
vif->tx.sring);
if (vif->rx.sring)
xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
vif->rx.sring);
int xenvif_map_frontend_rings(struct xenvif *vif,
grant_ref_t tx_ring_ref,
grant_ref_t rx_ring_ref)
void *addr;
struct xen_netif_tx_sring *txs;
struct xen_netif_rx_sring *rxs;
int err = -ENOMEM;
err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
tx_ring_ref, &addr);
if (err)
txs = (struct xen_netif_tx_sring *)addr;
BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE);
err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
rx_ring_ref, &addr);
if (err)
rxs = (struct xen_netif_rx_sring *)addr;
BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE);
vif->rx_req_cons_peek = 0;
{
struct xenvif *vif = data;
while (!kthread_should_stop()) {
wait_event_interruptible(vif->wq,
rx_work_todo(vif) ||
kthread_should_stop());
if (kthread_should_stop())
break;
if (rx_work_todo(vif))
cond_resched();
}
return 0;
}
static int __init netback_init(void)
{
int rc = 0;
if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) {
pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n",
fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX);
fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX;
rc = xenvif_xenbus_init();
if (rc)
goto failed_init;
return 0;
failed_init:
return rc;
}
module_init(netback_init);
static void __exit netback_fini(void)
{
xenvif_xenbus_fini();
}
module_exit(netback_fini);
MODULE_ALIAS("xen-backend:vif");