/* * CoDel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org> * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include "opt_altq.h" #include "opt_inet.h" #include "opt_inet6.h" #ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */ #include <sys/param.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/socket.h> #include <sys/systm.h> #include <net/if.h> #include <net/if_var.h> #include <net/if_private.h> #include <netinet/in.h> #include <netpfil/pf/pf.h> #include <netpfil/pf/pf_altq.h> #include <net/altq/if_altq.h> #include <net/altq/altq.h> #include <net/altq/altq_codel.h> static int codel_should_drop(struct codel *, class_queue_t *, struct mbuf *, u_int64_t); static void codel_Newton_step(struct codel_vars *); static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t); #define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0) #define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0) #define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0) #define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0) static int codel_request(struct ifaltq *, int, void *); static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); static struct mbuf *codel_dequeue(struct ifaltq *, int); int codel_pfattach(struct pf_altq *a) { struct ifnet *ifp; if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) return (EINVAL); return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc, codel_enqueue, codel_dequeue, codel_request)); } int codel_add_altq(struct ifnet *ifp, struct pf_altq *a) { struct codel_if *cif; struct codel_opts *opts; if (ifp == NULL) return (EINVAL); if (!ALTQ_IS_READY(&ifp->if_snd)) return (ENODEV); opts = &a->pq_u.codel_opts; cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO); if (cif == NULL) return (ENOMEM); cif->cif_bandwidth = a->ifbandwidth; cif->cif_ifq = &ifp->if_snd; cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (cif->cl_q == NULL) { free(cif, M_DEVBUF); return (ENOMEM); } if (a->qlimit == 0) a->qlimit = 50; /* use default. */ qlimit(cif->cl_q) = a->qlimit; qtype(cif->cl_q) = Q_CODEL; qlen(cif->cl_q) = 0; qsize(cif->cl_q) = 0; if (opts->target == 0) opts->target = 5; if (opts->interval == 0) opts->interval = 100; cif->codel.params.target = machclk_freq * opts->target / 1000; cif->codel.params.interval = machclk_freq * opts->interval / 1000; cif->codel.params.ecn = opts->ecn; cif->codel.stats.maxpacket = 256; cif->cl_stats.qlength = qlen(cif->cl_q); cif->cl_stats.qlimit = qlimit(cif->cl_q); /* keep the state in pf_altq */ a->altq_disc = cif; return (0); } int codel_remove_altq(struct pf_altq *a) { struct codel_if *cif; if ((cif = a->altq_disc) == NULL) return (EINVAL); a->altq_disc = NULL; if (cif->cl_q) free(cif->cl_q, M_DEVBUF); free(cif, M_DEVBUF); return (0); } int codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct codel_if *cif; struct codel_ifstats stats; int error = 0; if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL) return (EBADF); if (*nbytes < sizeof(stats)) return (EINVAL); stats = cif->cl_stats; stats.stats = cif->codel.stats; if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) return (error); *nbytes = sizeof(stats); return (0); } static int codel_request(struct ifaltq *ifq, int req, void *arg) { struct codel_if *cif = (struct codel_if *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); switch (req) { case ALTRQ_PURGE: if (!ALTQ_IS_ENABLED(cif->cif_ifq)) break; if (qempty(cif->cl_q)) break; while ((m = _getq(cif->cl_q)) != NULL) { PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); m_freem(m); IFQ_DEC_LEN(cif->cif_ifq); } cif->cif_ifq->ifq_len = 0; break; } return (0); } static int codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) { struct codel_if *cif = (struct codel_if *) ifq->altq_disc; IFQ_LOCK_ASSERT(ifq); /* grab class set by classifier */ if ((m->m_flags & M_PKTHDR) == 0) { /* should not happen */ printf("altq: packet for %s does not have pkthdr\n", ifq->altq_ifp->if_xname); m_freem(m); PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); return (ENOBUFS); } if (codel_addq(&cif->codel, cif->cl_q, m)) { PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); return (ENOBUFS); } IFQ_INC_LEN(ifq); return (0); } static struct mbuf * codel_dequeue(struct ifaltq *ifq, int op) { struct codel_if *cif = (struct codel_if *)ifq->altq_disc; struct mbuf *m; IFQ_LOCK_ASSERT(ifq); if (IFQ_IS_EMPTY(ifq)) return (NULL); if (op == ALTDQ_POLL) return (qhead(cif->cl_q)); m = codel_getq(&cif->codel, cif->cl_q); if (m != NULL) { IFQ_DEC_LEN(ifq); PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m)); return (m); } return (NULL); } struct codel * codel_alloc(int target, int interval, int ecn) { struct codel *c; c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO); if (c != NULL) { c->params.target = machclk_freq * target / 1000; c->params.interval = machclk_freq * interval / 1000; c->params.ecn = ecn; c->stats.maxpacket = 256; } return (c); } void codel_destroy(struct codel *c) { free(c, M_DEVBUF); } #define MTAG_CODEL 1438031249 int codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m) { struct m_tag *mtag; uint64_t *enqueue_time; if (qlen(q) < qlimit(q)) { mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); if (mtag == NULL) { mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t), M_NOWAIT); if (mtag != NULL) m_tag_prepend(m, mtag); } if (mtag == NULL) { m_freem(m); return (-1); } enqueue_time = (uint64_t *)(mtag + 1); *enqueue_time = read_machclk(); _addq(q, m); return (0); } c->drop_overlimit++; m_freem(m); return (-1); } static int codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m, u_int64_t now) { struct m_tag *mtag; uint64_t *enqueue_time; if (m == NULL) { c->vars.first_above_time = 0; return (0); } mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); if (mtag == NULL) { /* Only one warning per second. */ if (ppsratecheck(&c->last_log, &c->last_pps, 1)) printf("%s: could not found the packet mtag!\n", __func__); c->vars.first_above_time = 0; return (0); } enqueue_time = (uint64_t *)(mtag + 1); c->vars.ldelay = now - *enqueue_time; c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m)); if (codel_time_before(c->vars.ldelay, c->params.target) || qsize(q) <= c->stats.maxpacket) { /* went below - stay below for at least interval */ c->vars.first_above_time = 0; return (0); } if (c->vars.first_above_time == 0) { /* just went above from below. If we stay above * for at least interval we'll say it's ok to drop */ c->vars.first_above_time = now + c->params.interval; return (0); } if (codel_time_after(now, c->vars.first_above_time)) return (1); return (0); } /* * Run a Newton method step: * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ static void codel_Newton_step(struct codel_vars *vars) { uint32_t invsqrt, invsqrt2; uint64_t val; /* sizeof_in_bits(rec_inv_sqrt) */ #define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t)) /* needed shift to get a Q0.32 number from rec_inv_sqrt */ #define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32; val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; } static u_int64_t codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt) { return (t + (u_int32_t)(((u_int64_t)interval * (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32)); } struct mbuf * codel_getq(struct codel *c, class_queue_t *q) { struct mbuf *m; u_int64_t now; int drop; if ((m = _getq(q)) == NULL) { c->vars.dropping = 0; return (m); } now = read_machclk(); drop = codel_should_drop(c, q, m, now); if (c->vars.dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ c->vars.dropping = 0; } else if (codel_time_after_eq(now, c->vars.drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might * take us out of dropping state. * If not, schedule the next drop. * A large backlog might result in drop rates so high * that the next drop should happen now, * hence the while loop. */ while (c->vars.dropping && codel_time_after_eq(now, c->vars.drop_next)) { c->vars.count++; /* don't care of possible wrap * since there is no more * divide */ codel_Newton_step(&c->vars); /* TODO ECN */ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); m_freem(m); m = _getq(q); if (!codel_should_drop(c, q, m, now)) /* leave dropping state */ c->vars.dropping = 0; else /* and schedule the next drop */ c->vars.drop_next = codel_control_law(c->vars.drop_next, c->params.interval, c->vars.rec_inv_sqrt); } } } else if (drop) { /* TODO ECN */ PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); m_freem(m); m = _getq(q); drop = codel_should_drop(c, q, m, now); c->vars.dropping = 1; /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ if (codel_time_before(now - c->vars.drop_next, 16 * c->params.interval)) { c->vars.count = (c->vars.count - c->vars.lastcount) | 1; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(&c->vars); } else { c->vars.count = 1; c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } c->vars.lastcount = c->vars.count; c->vars.drop_next = codel_control_law(now, c->params.interval, c->vars.rec_inv_sqrt); } return (m); } void codel_getstats(struct codel *c, struct codel_stats *s) { *s = c->stats; } #endif /* ALTQ_CODEL */