/*-
* Copyright (c) 1991-1997 Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the Network Research
* Group at Lawrence Berkeley Laboratory.
* 4. Neither the name of the University nor of the Laboratory may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* LBL code modified by speer@eng.sun.com, May 1977.
* For questions and/or comments, please send mail to cbq@ee.lbl.gov
*
* @(#)rm_class.c 1.48 97/12/05 SMI
* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
*/
#include "opt_altq.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_private.h>
#include <net/altq/if_altq.h>
#include <net/altq/altq.h>
#include <net/altq/altq_codel.h>
#include <net/altq/altq_rmclass.h>
#include <net/altq/altq_rmclass_debug.h>
#include <net/altq/altq_red.h>
#include <net/altq/altq_rio.h>
/*
* Local Macros
*/
#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
/*
* Local routines.
*/
static int rmc_satisfied(struct rm_class *, struct timeval *);
static void rmc_wrr_set_weights(struct rm_ifdat *);
static void rmc_depth_compute(struct rm_class *);
static void rmc_depth_recompute(rm_class_t *);
static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
static int _rmc_addq(rm_class_t *, mbuf_t *);
static void _rmc_dropq(rm_class_t *);
static mbuf_t *_rmc_getq(rm_class_t *);
static mbuf_t *_rmc_pollq(rm_class_t *);
static int rmc_under_limit(struct rm_class *, struct timeval *);
static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
static void rmc_drop_action(struct rm_class *);
static void rmc_restart(void *);
static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
#define BORROW_OFFTIME
/*
* BORROW_OFFTIME (experimental):
* borrow the offtime of the class borrowing from.
* the reason is that when its own offtime is set, the class is unable
* to borrow much, especially when cutoff is taking effect.
* but when the borrowed class is overloaded (advidle is close to minidle),
* use the borrowing class's offtime to avoid overload.
*/
#define ADJUST_CUTOFF
/*
* ADJUST_CUTOFF (experimental):
* if no underlimit class is found due to cutoff, increase cutoff and
* retry the scheduling loop.
* also, don't invoke delay_actions while cutoff is taking effect,
* since a sleeping class won't have a chance to be scheduled in the
* next loop.
*
* now heuristics for setting the top-level variable (cutoff_) becomes:
* 1. if a packet arrives for a not-overlimit class, set cutoff
* to the depth of the class.
* 2. if cutoff is i, and a packet arrives for an overlimit class
* with an underlimit ancestor at a lower level than i (say j),
* then set cutoff to j.
* 3. at scheduling a packet, if there is no underlimit class
* due to the current cutoff level, increase cutoff by 1 and
* then try to schedule again.
*/
/*
* rm_class_t *
* rmc_newclass(...) - Create a new resource management class at priority
* 'pri' on the interface given by 'ifd'.
*
* nsecPerByte is the data rate of the interface in nanoseconds/byte.
* E.g., 800 for a 10Mb/s ethernet. If the class gets less
* than 100% of the bandwidth, this number should be the
* 'effective' rate for the class. Let f be the
* bandwidth fraction allocated to this class, and let
* nsPerByte be the data rate of the output link in
* nanoseconds/byte. Then nsecPerByte is set to
* nsPerByte / f. E.g., 1600 (= 800 / .5)
* for a class that gets 50% of an ethernet's bandwidth.
*
* action the routine to call when the class is over limit.
*
* maxq max allowable queue size for class (in packets).
*
* parent parent class pointer.
*
* borrow class to borrow from (should be either 'parent' or null).
*
* maxidle max value allowed for class 'idle' time estimate (this
* parameter determines how large an initial burst of packets
* can be before overlimit action is invoked.
*
* offtime how long 'delay' action will delay when class goes over
* limit (this parameter determines the steady-state burst
* size when a class is running over its limit).
*
* Maxidle and offtime have to be computed from the following: If the
* average packet size is s, the bandwidth fraction allocated to this
* class is f, we want to allow b packet bursts, and the gain of the
* averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
*
* ptime = s * nsPerByte * (1 - f) / f
* maxidle = ptime * (1 - g^b) / g^b
* minidle = -ptime * (1 / (f - 1))
* offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
*
* Operationally, it's convenient to specify maxidle & offtime in units
* independent of the link bandwidth so the maxidle & offtime passed to
* this routine are the above values multiplied by 8*f/(1000*nsPerByte).
* (The constant factor is a scale factor needed to make the parameters
* integers. This scaling also means that the 'unscaled' values of
* maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
* not nanoseconds.) Also note that the 'idle' filter computation keeps
* an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
* maxidle also must be scaled upward by this value. Thus, the passed
* values for maxidle and offtime can be computed as follows:
*
* maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
* offtime = offtime * 8 / (1000 * nsecPerByte)
*
* When USE_HRTIME is employed, then maxidle and offtime become:
* maxidle = maxilde * (8.0 / nsecPerByte);
* offtime = offtime * (8.0 / nsecPerByte);
*/
struct rm_class *
rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
void (*action)(rm_class_t *, rm_class_t *), int maxq,
struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
int minidle, u_int offtime, int pktsize, int flags)
{
struct rm_class *cl;
struct rm_class *peer;
int s;
if (pri >= RM_MAXPRIO)
return (NULL);
#ifndef ALTQ_RED
if (flags & RMCF_RED) {
#ifdef ALTQ_DEBUG
printf("rmc_newclass: RED not configured for CBQ!\n");
#endif
return (NULL);
}
#endif
#ifndef ALTQ_RIO
if (flags & RMCF_RIO) {
#ifdef ALTQ_DEBUG
printf("rmc_newclass: RIO not configured for CBQ!\n");
#endif
return (NULL);
}
#endif
#ifndef ALTQ_CODEL
if (flags & RMCF_CODEL) {
#ifdef ALTQ_DEBUG
printf("rmc_newclass: CODEL not configured for CBQ!\n");
#endif
return (NULL);
}
#endif
cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
if (cl == NULL)
return (NULL);
CALLOUT_INIT(&cl->callout_);
cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
if (cl->q_ == NULL) {
free(cl, M_DEVBUF);
return (NULL);
}
/*
* Class initialization.
*/
cl->children_ = NULL;
cl->parent_ = parent;
cl->borrow_ = borrow;
cl->leaf_ = 1;
cl->ifdat_ = ifd;
cl->pri_ = pri;
cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
cl->depth_ = 0;
cl->qthresh_ = 0;
cl->ns_per_byte_ = nsecPerByte;
qlimit(cl->q_) = maxq;
qtype(cl->q_) = Q_DROPHEAD;
qlen(cl->q_) = 0;
cl->flags_ = flags;
#if 1 /* minidle is also scaled in ALTQ */
cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
if (cl->minidle_ > 0)
cl->minidle_ = 0;
#else
cl->minidle_ = minidle;
#endif
cl->maxidle_ = (maxidle * nsecPerByte) / 8;
if (cl->maxidle_ == 0)
cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
cl->avgidle_ = cl->maxidle_;
cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
if (cl->offtime_ == 0)
cl->offtime_ = 1;
#else
cl->avgidle_ = 0;
cl->offtime_ = (offtime * nsecPerByte) / 8;
#endif
cl->overlimit = action;
#ifdef ALTQ_RED
if (flags & (RMCF_RED|RMCF_RIO)) {
int red_flags, red_pkttime;
red_flags = 0;
if (flags & RMCF_ECN)
red_flags |= REDF_ECN;
if (flags & RMCF_FLOWVALVE)
red_flags |= REDF_FLOWVALVE;
#ifdef ALTQ_RIO
if (flags & RMCF_CLEARDSCP)
red_flags |= RIOF_CLEARDSCP;
#endif
red_pkttime = nsecPerByte * pktsize / 1000;
if (flags & RMCF_RED) {
cl->red_ = red_alloc(0, 0,
qlimit(cl->q_) * 10/100,
qlimit(cl->q_) * 30/100,
red_flags, red_pkttime);
if (cl->red_ != NULL)
qtype(cl->q_) = Q_RED;
}
#ifdef ALTQ_RIO
else {
cl->red_ = (red_t *)rio_alloc(0, NULL,
red_flags, red_pkttime);
if (cl->red_ != NULL)
qtype(cl->q_) = Q_RIO;
}
#endif
}
#endif /* ALTQ_RED */
#ifdef ALTQ_CODEL
if (flags & RMCF_CODEL) {
cl->codel_ = codel_alloc(5, 100, 0);
if (cl->codel_ != NULL)
qtype(cl->q_) = Q_CODEL;
}
#endif
/*
* put the class into the class tree
*/
s = splnet();
IFQ_LOCK(ifd->ifq_);
if ((peer = ifd->active_[pri]) != NULL) {
/* find the last class at this pri */
cl->peer_ = peer;
while (peer->peer_ != ifd->active_[pri])
peer = peer->peer_;
peer->peer_ = cl;
} else {
ifd->active_[pri] = cl;
cl->peer_ = cl;
}
if (cl->parent_) {
cl->next_ = parent->children_;
parent->children_ = cl;
parent->leaf_ = 0;
}
/*
* Compute the depth of this class and its ancestors in the class
* hierarchy.
*/
rmc_depth_compute(cl);
/*
* If CBQ's WRR is enabled, then initialize the class WRR state.
*/
if (ifd->wrr_) {
ifd->num_[pri]++;
ifd->alloc_[pri] += cl->allotment_;
rmc_wrr_set_weights(ifd);
}
IFQ_UNLOCK(ifd->ifq_);
splx(s);
return (cl);
}
int
rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
int minidle, u_int offtime, int pktsize)
{
struct rm_ifdat *ifd;
u_int old_allotment;
int s;
ifd = cl->ifdat_;
old_allotment = cl->allotment_;
s = splnet();
IFQ_LOCK(ifd->ifq_);
cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
cl->qthresh_ = 0;
cl->ns_per_byte_ = nsecPerByte;
qlimit(cl->q_) = maxq;
#if 1 /* minidle is also scaled in ALTQ */
cl->minidle_ = (minidle * nsecPerByte) / 8;
if (cl->minidle_ > 0)
cl->minidle_ = 0;
#else
cl->minidle_ = minidle;
#endif
cl->maxidle_ = (maxidle * nsecPerByte) / 8;
if (cl->maxidle_ == 0)
cl->maxidle_ = 1;
#if 1 /* offtime is also scaled in ALTQ */
cl->avgidle_ = cl->maxidle_;
cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
if (cl->offtime_ == 0)
cl->offtime_ = 1;
#else
cl->avgidle_ = 0;
cl->offtime_ = (offtime * nsecPerByte) / 8;
#endif
/*
* If CBQ's WRR is enabled, then initialize the class WRR state.
*/
if (ifd->wrr_) {
ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
rmc_wrr_set_weights(ifd);
}
IFQ_UNLOCK(ifd->ifq_);
splx(s);
return (0);
}
/*
* static void
* rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
* the appropriate run robin weights for the CBQ weighted round robin
* algorithm.
*
* Returns: NONE
*/
static void
rmc_wrr_set_weights(struct rm_ifdat *ifd)
{
int i;
struct rm_class *cl, *clh;
for (i = 0; i < RM_MAXPRIO; i++) {
/*
* This is inverted from that of the simulator to
* maintain precision.
*/
if (ifd->num_[i] == 0)
ifd->M_[i] = 0;
else
ifd->M_[i] = ifd->alloc_[i] /
(ifd->num_[i] * ifd->maxpkt_);
/*
* Compute the weighted allotment for each class.
* This takes the expensive div instruction out
* of the main loop for the wrr scheduling path.
* These only get recomputed when a class comes or
* goes.
*/
if (ifd->active_[i] != NULL) {
clh = cl = ifd->active_[i];
do {
/* safe-guard for slow link or alloc_ == 0 */
if (ifd->M_[i] == 0)
cl->w_allotment_ = 0;
else
cl->w_allotment_ = cl->allotment_ /
ifd->M_[i];
cl = cl->peer_;
} while ((cl != NULL) && (cl != clh));
}
}
}
int
rmc_get_weight(struct rm_ifdat *ifd, int pri)
{
if ((pri >= 0) && (pri < RM_MAXPRIO))
return (ifd->M_[pri]);
else
return (0);
}
/*
* static void
* rmc_depth_compute(struct rm_class *cl) - This function computes the
* appropriate depth of class 'cl' and its ancestors.
*
* Returns: NONE
*/
static void
rmc_depth_compute(struct rm_class *cl)
{
rm_class_t *t = cl, *p;
/*
* Recompute the depth for the branch of the tree.
*/
while (t != NULL) {
p = t->parent_;
if (p && (t->depth_ >= p->depth_)) {
p->depth_ = t->depth_ + 1;
t = p;
} else
t = NULL;
}
}
/*
* static void
* rmc_depth_recompute(struct rm_class *cl) - This function re-computes
* the depth of the tree after a class has been deleted.
*
* Returns: NONE
*/
static void
rmc_depth_recompute(rm_class_t *cl)
{
#if 1 /* ALTQ */
rm_class_t *p, *t;
p = cl;
while (p != NULL) {
if ((t = p->children_) == NULL) {
p->depth_ = 0;
} else {
int cdepth = 0;
while (t != NULL) {
if (t->depth_ > cdepth)
cdepth = t->depth_;
t = t->next_;
}
if (p->depth_ == cdepth + 1)
/* no change to this parent */
return;
p->depth_ = cdepth + 1;
}
p = p->parent_;
}
#else
rm_class_t *t;
if (cl->depth_ >= 1) {
if (cl->children_ == NULL) {
cl->depth_ = 0;
} else if ((t = cl->children_) != NULL) {
while (t != NULL) {
if (t->children_ != NULL)
rmc_depth_recompute(t);
t = t->next_;
}
} else
rmc_depth_compute(cl);
}
#endif
}
/*
* void
* rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
* function deletes a class from the link-sharing structure and frees
* all resources associated with the class.
*
* Returns: NONE
*/
void
rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
{
struct rm_class *p, *head, *previous;
int s;
ASSERT(cl->children_ == NULL);
if (cl->sleeping_)
CALLOUT_STOP(&cl->callout_);
s = splnet();
IFQ_LOCK(ifd->ifq_);
/*
* Free packets in the packet queue.
* XXX - this may not be a desired behavior. Packets should be
* re-queued.
*/
rmc_dropall(cl);
/*
* If the class has a parent, then remove the class from the
* class from the parent's children chain.
*/
if (cl->parent_ != NULL) {
head = cl->parent_->children_;
p = previous = head;
if (head->next_ == NULL) {
ASSERT(head == cl);
cl->parent_->children_ = NULL;
cl->parent_->leaf_ = 1;
} else while (p != NULL) {
if (p == cl) {
if (cl == head)
cl->parent_->children_ = cl->next_;
else
previous->next_ = cl->next_;
cl->next_ = NULL;
p = NULL;
} else {
previous = p;
p = p->next_;
}
}
}
/*
* Delete class from class priority peer list.
*/
if ((p = ifd->active_[cl->pri_]) != NULL) {
/*
* If there is more than one member of this priority
* level, then look for class(cl) in the priority level.
*/
if (p != p->peer_) {
while (p->peer_ != cl)
p = p->peer_;
p->peer_ = cl->peer_;
if (ifd->active_[cl->pri_] == cl)
ifd->active_[cl->pri_] = cl->peer_;
} else {
ASSERT(p == cl);
ifd->active_[cl->pri_] = NULL;
}
}
/*
* Recompute the WRR weights.
*/
if (ifd->wrr_) {
ifd->alloc_[cl->pri_] -= cl->allotment_;
ifd->num_[cl->pri_]--;
rmc_wrr_set_weights(ifd);
}
/*
* Re-compute the depth of the tree.
*/
#if 1 /* ALTQ */
rmc_depth_recompute(cl->parent_);
#else
rmc_depth_recompute(ifd->root_);
#endif
IFQ_UNLOCK(ifd->ifq_);
splx(s);
/*
* Free the class structure.
*/
if (cl->red_ != NULL) {
#ifdef ALTQ_RIO
if (q_is_rio(cl->q_))
rio_destroy((rio_t *)cl->red_);
#endif
#ifdef ALTQ_RED
if (q_is_red(cl->q_))
red_destroy(cl->red_);
#endif
#ifdef ALTQ_CODEL
if (q_is_codel(cl->q_))
codel_destroy(cl->codel_);
#endif
}
free(cl->q_, M_DEVBUF);
free(cl, M_DEVBUF);
}
/*
* void
* rmc_init(...) - Initialize the resource management data structures
* associated with the output portion of interface 'ifp'. 'ifd' is
* where the structures will be built (for backwards compatibility, the
* structures aren't kept in the ifnet struct). 'nsecPerByte'
* gives the link speed (inverse of bandwidth) in nanoseconds/byte.
* 'restart' is the driver-specific routine that the generic 'delay
* until under limit' action will call to restart output. `maxq'
* is the queue size of the 'link' & 'default' classes. 'maxqueued'
* is the maximum number of packets that the resource management
* code will allow to be queued 'downstream' (this is typically 1).
*
* Returns: NONE
*/
void
rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
int minidle, u_int offtime, int flags)
{
int i, mtu;
/*
* Initialize the CBQ tracing/debug facility.
*/
CBQTRACEINIT();
bzero((char *)ifd, sizeof (*ifd));
mtu = ifq->altq_ifp->if_mtu;
ifd->ifq_ = ifq;
ifd->restart = restart;
ifd->maxqueued_ = maxqueued;
ifd->ns_per_byte_ = nsecPerByte;
ifd->maxpkt_ = mtu;
ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
#if 1
ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
if (mtu * nsecPerByte > 10 * 1000000)
ifd->maxiftime_ /= 4;
#endif
reset_cutoff(ifd);
CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
/*
* Initialize the CBQ's WRR state.
*/
for (i = 0; i < RM_MAXPRIO; i++) {
ifd->alloc_[i] = 0;
ifd->M_[i] = 0;
ifd->num_[i] = 0;
ifd->na_[i] = 0;
ifd->active_[i] = NULL;
}
/*
* Initialize current packet state.
*/
ifd->qi_ = 0;
ifd->qo_ = 0;
for (i = 0; i < RM_MAXQUEUED; i++) {
ifd->class_[i] = NULL;
ifd->curlen_[i] = 0;
ifd->borrowed_[i] = NULL;
}
/*
* Create the root class of the link-sharing structure.
*/
if ((ifd->root_ = rmc_newclass(0, ifd,
nsecPerByte,
rmc_root_overlimit, maxq, 0, 0,
maxidle, minidle, offtime,
0, 0)) == NULL) {
printf("rmc_init: root class not allocated\n");
return ;
}
ifd->root_->depth_ = 0;
}
/*
* void
* rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
* mbuf 'm' to queue for resource class 'cl'. This routine is called
* by a driver's if_output routine. This routine must be called with
* output packet completion interrupts locked out (to avoid racing with
* rmc_dequeue_next).
*
* Returns: 0 on successful queueing
* -1 when packet drop occurs
*/
int
rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
{
struct timeval now;
struct rm_ifdat *ifd = cl->ifdat_;
int cpri = cl->pri_;
int is_empty = qempty(cl->q_);
RM_GETTIME(now);
if (ifd->cutoff_ > 0) {
if (TV_LT(&cl->undertime_, &now)) {
if (ifd->cutoff_ > cl->depth_)
ifd->cutoff_ = cl->depth_;
CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
}
#if 1 /* ALTQ */
else {
/*
* the class is overlimit. if the class has
* underlimit ancestors, set cutoff to the lowest
* depth among them.
*/
struct rm_class *borrow = cl->borrow_;
while (borrow != NULL &&
borrow->depth_ < ifd->cutoff_) {
if (TV_LT(&borrow->undertime_, &now)) {
ifd->cutoff_ = borrow->depth_;
CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
break;
}
borrow = borrow->borrow_;
}
}
#else /* !ALTQ */
else if ((ifd->cutoff_ > 1) && cl->borrow_) {
if (TV_LT(&cl->borrow_->undertime_, &now)) {
ifd->cutoff_ = cl->borrow_->depth_;
CBQTRACE(rmc_queue_packet, 'ffob',
cl->borrow_->depth_);
}
}
#endif /* !ALTQ */
}
if (_rmc_addq(cl, m) < 0)
/* failed */
return (-1);
if (is_empty) {
CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
ifd->na_[cpri]++;
}
if (qlen(cl->q_) > qlimit(cl->q_)) {
/* note: qlimit can be set to 0 or 1 */
rmc_drop_action(cl);
return (-1);
}
return (0);
}
/*
* void
* rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
* classes to see if there are satified.
*/
static void
rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
{
int i;
rm_class_t *p, *bp;
for (i = RM_MAXPRIO - 1; i >= 0; i--) {
if ((bp = ifd->active_[i]) != NULL) {
p = bp;
do {
if (!rmc_satisfied(p, now)) {
ifd->cutoff_ = p->depth_;
return;
}
p = p->peer_;
} while (p != bp);
}
}
reset_cutoff(ifd);
}
/*
* rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
*/
static int
rmc_satisfied(struct rm_class *cl, struct timeval *now)
{
rm_class_t *p;
if (cl == NULL)
return (1);
if (TV_LT(now, &cl->undertime_))
return (1);
if (cl->depth_ == 0) {
if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
return (0);
else
return (1);
}
if (cl->children_ != NULL) {
p = cl->children_;
while (p != NULL) {
if (!rmc_satisfied(p, now))
return (0);
p = p->next_;
}
}
return (1);
}
/*
* Return 1 if class 'cl' is under limit or can borrow from a parent,
* 0 if overlimit. As a side-effect, this routine will invoke the
* class overlimit action if the class if overlimit.
*/
static int
rmc_under_limit(struct rm_class *cl, struct timeval *now)
{
rm_class_t *p = cl;
rm_class_t *top;
struct rm_ifdat *ifd = cl->ifdat_;
ifd->borrowed_[ifd->qi_] = NULL;
/*
* If cl is the root class, then always return that it is
* underlimit. Otherwise, check to see if the class is underlimit.
*/
if (cl->parent_ == NULL)
return (1);
if (cl->sleeping_) {
if (TV_LT(now, &cl->undertime_))
return (0);
CALLOUT_STOP(&cl->callout_);
cl->sleeping_ = 0;
cl->undertime_.tv_sec = 0;
return (1);
}
top = NULL;
while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
if (((cl = cl->borrow_) == NULL) ||
(cl->depth_ > ifd->cutoff_)) {
#ifdef ADJUST_CUTOFF
if (cl != NULL)
/* cutoff is taking effect, just
return false without calling
the delay action. */
return (0);
#endif
#ifdef BORROW_OFFTIME
/*
* check if the class can borrow offtime too.
* borrow offtime from the top of the borrow
* chain if the top class is not overloaded.
*/
if (cl != NULL) {
/* cutoff is taking effect, use this class as top. */
top = cl;
CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
}
if (top != NULL && top->avgidle_ == top->minidle_)
top = NULL;
p->overtime_ = *now;
(p->overlimit)(p, top);
#else
p->overtime_ = *now;
(p->overlimit)(p, NULL);
#endif
return (0);
}
top = cl;
}
if (cl != p)
ifd->borrowed_[ifd->qi_] = cl;
return (1);
}
/*
* _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
* Packet-by-packet round robin.
*
* The heart of the weighted round-robin scheduler, which decides which
* class next gets to send a packet. Highest priority first, then
* weighted round-robin within priorites.
*
* Each able-to-send class gets to send until its byte allocation is
* exhausted. Thus, the active pointer is only changed after a class has
* exhausted its allocation.
*
* If the scheduler finds no class that is underlimit or able to borrow,
* then the first class found that had a nonzero queue and is allowed to
* borrow gets to send.
*/
static mbuf_t *
_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
{
struct rm_class *cl = NULL, *first = NULL;
u_int deficit;
int cpri;
mbuf_t *m;
struct timeval now;
RM_GETTIME(now);
/*
* if the driver polls the top of the queue and then removes
* the polled packet, we must return the same packet.
*/
if (op == ALTDQ_REMOVE && ifd->pollcache_) {
cl = ifd->pollcache_;
cpri = cl->pri_;
if (ifd->efficient_) {
/* check if this class is overlimit */
if (cl->undertime_.tv_sec != 0 &&
rmc_under_limit(cl, &now) == 0)
first = cl;
}
ifd->pollcache_ = NULL;
goto _wrr_out;
}
else {
/* mode == ALTDQ_POLL || pollcache == NULL */
ifd->pollcache_ = NULL;
ifd->borrowed_[ifd->qi_] = NULL;
}
#ifdef ADJUST_CUTOFF
_again:
#endif
for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
if (ifd->na_[cpri] == 0)
continue;
deficit = 0;
/*
* Loop through twice for a priority level, if some class
* was unable to send a packet the first round because
* of the weighted round-robin mechanism.
* During the second loop at this level, deficit==2.
* (This second loop is not needed if for every class,
* "M[cl->pri_])" times "cl->allotment" is greater than
* the byte size for the largest packet in the class.)
*/
_wrr_loop:
cl = ifd->active_[cpri];
ASSERT(cl != NULL);
do {
if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
cl->bytes_alloc_ += cl->w_allotment_;
if (!qempty(cl->q_)) {
if ((cl->undertime_.tv_sec == 0) ||
rmc_under_limit(cl, &now)) {
if (cl->bytes_alloc_ > 0 || deficit > 1)
goto _wrr_out;
/* underlimit but no alloc */
deficit = 1;
#if 1
ifd->borrowed_[ifd->qi_] = NULL;
#endif
}
else if (first == NULL && cl->borrow_ != NULL)
first = cl; /* borrowing candidate */
}
cl->bytes_alloc_ = 0;
cl = cl->peer_;
} while (cl != ifd->active_[cpri]);
if (deficit == 1) {
/* first loop found an underlimit class with deficit */
/* Loop on same priority level, with new deficit. */
deficit = 2;
goto _wrr_loop;
}
}
#ifdef ADJUST_CUTOFF
/*
* no underlimit class found. if cutoff is taking effect,
* increase cutoff and try again.
*/
if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
ifd->cutoff_++;
CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
goto _again;
}
#endif /* ADJUST_CUTOFF */
/*
* If LINK_EFFICIENCY is turned on, then the first overlimit
* class we encounter will send a packet if all the classes
* of the link-sharing structure are overlimit.
*/
reset_cutoff(ifd);
CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
if (!ifd->efficient_ || first == NULL)
return (NULL);
cl = first;
cpri = cl->pri_;
#if 0 /* too time-consuming for nothing */
if (cl->sleeping_)
CALLOUT_STOP(&cl->callout_);
cl->sleeping_ = 0;
cl->undertime_.tv_sec = 0;
#endif
ifd->borrowed_[ifd->qi_] = cl->borrow_;
ifd->cutoff_ = cl->borrow_->depth_;
/*
* Deque the packet and do the book keeping...
*/
_wrr_out:
if (op == ALTDQ_REMOVE) {
m = _rmc_getq(cl);
if (m == NULL)
panic("_rmc_wrr_dequeue_next");
if (qempty(cl->q_))
ifd->na_[cpri]--;
/*
* Update class statistics and link data.
*/
if (cl->bytes_alloc_ > 0)
cl->bytes_alloc_ -= m_pktlen(m);
if ((cl->bytes_alloc_ <= 0) || first == cl)
ifd->active_[cl->pri_] = cl->peer_;
else
ifd->active_[cl->pri_] = cl;
ifd->class_[ifd->qi_] = cl;
ifd->curlen_[ifd->qi_] = m_pktlen(m);
ifd->now_[ifd->qi_] = now;
ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
ifd->queued_++;
} else {
/* mode == ALTDQ_PPOLL */
m = _rmc_pollq(cl);
ifd->pollcache_ = cl;
}
return (m);
}
/*
* Dequeue & return next packet from the highest priority class that
* has a packet to send & has enough allocation to send it. This
* routine is called by a driver whenever it needs a new packet to
* output.
*/
static mbuf_t *
_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
{
mbuf_t *m;
int cpri;
struct rm_class *cl, *first = NULL;
struct timeval now;
RM_GETTIME(now);
/*
* if the driver polls the top of the queue and then removes
* the polled packet, we must return the same packet.
*/
if (op == ALTDQ_REMOVE && ifd->pollcache_) {
cl = ifd->pollcache_;
cpri = cl->pri_;
ifd->pollcache_ = NULL;
goto _prr_out;
} else {
/* mode == ALTDQ_POLL || pollcache == NULL */
ifd->pollcache_ = NULL;
ifd->borrowed_[ifd->qi_] = NULL;
}
#ifdef ADJUST_CUTOFF
_again:
#endif
for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
if (ifd->na_[cpri] == 0)
continue;
cl = ifd->active_[cpri];
ASSERT(cl != NULL);
do {
if (!qempty(cl->q_)) {
if ((cl->undertime_.tv_sec == 0) ||
rmc_under_limit(cl, &now))
goto _prr_out;
if (first == NULL && cl->borrow_ != NULL)
first = cl;
}
cl = cl->peer_;
} while (cl != ifd->active_[cpri]);
}
#ifdef ADJUST_CUTOFF
/*
* no underlimit class found. if cutoff is taking effect, increase
* cutoff and try again.
*/
if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
ifd->cutoff_++;
goto _again;
}
#endif /* ADJUST_CUTOFF */
/*
* If LINK_EFFICIENCY is turned on, then the first overlimit
* class we encounter will send a packet if all the classes
* of the link-sharing structure are overlimit.
*/
reset_cutoff(ifd);
if (!ifd->efficient_ || first == NULL)
return (NULL);
cl = first;
cpri = cl->pri_;
#if 0 /* too time-consuming for nothing */
if (cl->sleeping_)
CALLOUT_STOP(&cl->callout_);
cl->sleeping_ = 0;
cl->undertime_.tv_sec = 0;
#endif
ifd->borrowed_[ifd->qi_] = cl->borrow_;
ifd->cutoff_ = cl->borrow_->depth_;
/*
* Deque the packet and do the book keeping...
*/
_prr_out:
if (op == ALTDQ_REMOVE) {
m = _rmc_getq(cl);
if (m == NULL)
panic("_rmc_prr_dequeue_next");
if (qempty(cl->q_))
ifd->na_[cpri]--;
ifd->active_[cpri] = cl->peer_;
ifd->class_[ifd->qi_] = cl;
ifd->curlen_[ifd->qi_] = m_pktlen(m);
ifd->now_[ifd->qi_] = now;
ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
ifd->queued_++;
} else {
/* mode == ALTDQ_POLL */
m = _rmc_pollq(cl);
ifd->pollcache_ = cl;
}
return (m);
}
/*
* mbuf_t *
* rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
* is invoked by the packet driver to get the next packet to be
* dequeued and output on the link. If WRR is enabled, then the
* WRR dequeue next routine will determine the next packet to sent.
* Otherwise, packet-by-packet round robin is invoked.
*
* Returns: NULL, if a packet is not available or if all
* classes are overlimit.
*
* Otherwise, Pointer to the next packet.
*/
mbuf_t *
rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
{
if (ifd->queued_ >= ifd->maxqueued_)
return (NULL);
else if (ifd->wrr_)
return (_rmc_wrr_dequeue_next(ifd, mode));
else
return (_rmc_prr_dequeue_next(ifd, mode));
}
/*
* Update the utilization estimate for the packet that just completed.
* The packet's class & the parent(s) of that class all get their
* estimators updated. This routine is called by the driver's output-
* packet-completion interrupt service routine.
*/
/*
* a macro to approximate "divide by 1000" that gives 0.000999,
* if a value has enough effective digits.
* (on pentium, mul takes 9 cycles but div takes 46!)
*/
#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
void
rmc_update_class_util(struct rm_ifdat *ifd)
{
int idle, avgidle, pktlen;
int pkt_time, tidle;
rm_class_t *cl, *borrowed;
rm_class_t *borrows;
struct timeval *nowp;
/*
* Get the most recent completed class.
*/
if ((cl = ifd->class_[ifd->qo_]) == NULL)
return;
pktlen = ifd->curlen_[ifd->qo_];
borrowed = ifd->borrowed_[ifd->qo_];
borrows = borrowed;
PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
/*
* Run estimator on class and its ancestors.
*/
/*
* rm_update_class_util is designed to be called when the
* transfer is completed from a xmit complete interrupt,
* but most drivers don't implement an upcall for that.
* so, just use estimated completion time.
* as a result, ifd->qi_ and ifd->qo_ are always synced.
*/
nowp = &ifd->now_[ifd->qo_];
/* get pkt_time (for link) in usec */
#if 1 /* use approximation */
pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
pkt_time = NSEC_TO_USEC(pkt_time);
#else
pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
#endif
#if 1 /* ALTQ4PPP */
if (TV_LT(nowp, &ifd->ifnow_)) {
int iftime;
/*
* make sure the estimated completion time does not go
* too far. it can happen when the link layer supports
* data compression or the interface speed is set to
* a much lower value.
*/
TV_DELTA(&ifd->ifnow_, nowp, iftime);
if (iftime+pkt_time < ifd->maxiftime_) {
TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
} else {
TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
}
} else {
TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
}
#else
if (TV_LT(nowp, &ifd->ifnow_)) {
TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
} else {
TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
}
#endif
while (cl != NULL) {
TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
if (idle >= 2000000)
/*
* this class is idle enough, reset avgidle.
* (TV_DELTA returns 2000000 us when delta is large.)
*/
cl->avgidle_ = cl->maxidle_;
/* get pkt_time (for class) in usec */
#if 1 /* use approximation */
pkt_time = pktlen * cl->ns_per_byte_;
pkt_time = NSEC_TO_USEC(pkt_time);
#else
pkt_time = pktlen * cl->ns_per_byte_ / 1000;
#endif
idle -= pkt_time;
avgidle = cl->avgidle_;
avgidle += idle - (avgidle >> RM_FILTER_GAIN);
cl->avgidle_ = avgidle;
/* Are we overlimit ? */
if (avgidle <= 0) {
CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
#if 1 /* ALTQ */
/*
* need some lower bound for avgidle, otherwise
* a borrowing class gets unbounded penalty.
*/
if (avgidle < cl->minidle_)
avgidle = cl->avgidle_ = cl->minidle_;
#endif
/* set next idle to make avgidle 0 */
tidle = pkt_time +
(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
++cl->stats_.over;
} else {
cl->avgidle_ =
(avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
cl->undertime_.tv_sec = 0;
if (cl->sleeping_) {
CALLOUT_STOP(&cl->callout_);
cl->sleeping_ = 0;
}
}
if (borrows != NULL) {
if (borrows != cl)
++cl->stats_.borrows;
else
borrows = NULL;
}
cl->last_ = ifd->ifnow_;
cl->last_pkttime_ = pkt_time;
#if 1
if (cl->parent_ == NULL) {
/* take stats of root class */
PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
}
#endif
cl = cl->parent_;
}
/*
* Check to see if cutoff needs to set to a new level.
*/
cl = ifd->class_[ifd->qo_];
if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
#if 1 /* ALTQ */
if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
rmc_tl_satisfied(ifd, nowp);
CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
} else {
ifd->cutoff_ = borrowed->depth_;
CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
}
#else /* !ALTQ */
if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
reset_cutoff(ifd);
#ifdef notdef
rmc_tl_satisfied(ifd, &now);
#endif
CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
} else {
ifd->cutoff_ = borrowed->depth_;
CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
}
#endif /* !ALTQ */
}
/*
* Release class slot
*/
ifd->borrowed_[ifd->qo_] = NULL;
ifd->class_[ifd->qo_] = NULL;
ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
ifd->queued_--;
}
/*
* void
* rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
* over-limit action routines. These get invoked by rmc_under_limit()
* if a class with packets to send if over its bandwidth limit & can't
* borrow from a parent class.
*
* Returns: NONE
*/
static void
rmc_drop_action(struct rm_class *cl)
{
struct rm_ifdat *ifd = cl->ifdat_;
ASSERT(qlen(cl->q_) > 0);
_rmc_dropq(cl);
if (qempty(cl->q_))
ifd->na_[cl->pri_]--;
}
void rmc_dropall(struct rm_class *cl)
{
struct rm_ifdat *ifd = cl->ifdat_;
if (!qempty(cl->q_)) {
_flushq(cl->q_);
ifd->na_[cl->pri_]--;
}
}
static int
hzto(struct timeval *tv)
{
struct timeval t2;
getmicrotime(&t2);
t2.tv_sec = tv->tv_sec - t2.tv_sec;
t2.tv_usec = tv->tv_usec - t2.tv_usec;
return (tvtohz(&t2));
}
/*
* void
* rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
* delay action routine. It is invoked via rmc_under_limit when the
* packet is discoverd to be overlimit.
*
* If the delay action is result of borrow class being overlimit, then
* delay for the offtime of the borrowing class that is overlimit.
*
* Returns: NONE
*/
void
rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
{
int delay, t, extradelay;
cl->stats_.overactions++;
TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
#ifndef BORROW_OFFTIME
delay += cl->offtime_;
#endif
if (!cl->sleeping_) {
CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
#ifdef BORROW_OFFTIME
if (borrow != NULL)
extradelay = borrow->offtime_;
else
#endif
extradelay = cl->offtime_;
#ifdef ALTQ
/*
* XXX recalculate suspend time:
* current undertime is (tidle + pkt_time) calculated
* from the last transmission.
* tidle: time required to bring avgidle back to 0
* pkt_time: target waiting time for this class
* we need to replace pkt_time by offtime
*/
extradelay -= cl->last_pkttime_;
#endif
if (extradelay > 0) {
TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
delay += extradelay;
}
cl->sleeping_ = 1;
cl->stats_.delays++;
/*
* Since packets are phased randomly with respect to the
* clock, 1 tick (the next clock tick) can be an arbitrarily
* short time so we have to wait for at least two ticks.
* NOTE: If there's no other traffic, we need the timer as
* a 'backstop' to restart this class.
*/
if (delay > tick * 2) {
/* FreeBSD rounds up the tick */
t = hzto(&cl->undertime_);
} else
t = 2;
CALLOUT_RESET(&cl->callout_, t, rmc_restart, cl);
}
}
/*
* void
* rmc_restart() - is just a helper routine for rmc_delay_action -- it is
* called by the system timer code & is responsible checking if the
* class is still sleeping (it might have been restarted as a side
* effect of the queue scan on a packet arrival) and, if so, restarting
* output for the class. Inspecting the class state & restarting output
* require locking the class structure. In general the driver is
* responsible for locking but this is the only routine that is not
* called directly or indirectly from the interface driver so it has
* know about system locking conventions. Under bsd, locking is done
* by raising IPL to splimp so that's what's implemented here. On a
* different system this would probably need to be changed.
*
* Returns: NONE
*/
static void
rmc_restart(void *arg)
{
struct rm_class *cl = arg;
struct rm_ifdat *ifd = cl->ifdat_;
struct epoch_tracker et;
int s;
s = splnet();
NET_EPOCH_ENTER(et);
IFQ_LOCK(ifd->ifq_);
CURVNET_SET(ifd->ifq_->altq_ifp->if_vnet);
if (cl->sleeping_) {
cl->sleeping_ = 0;
cl->undertime_.tv_sec = 0;
if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
(ifd->restart)(ifd->ifq_);
}
}
CURVNET_RESTORE();
IFQ_UNLOCK(ifd->ifq_);
NET_EPOCH_EXIT(et);
splx(s);
}
/*
* void
* rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
* handling routine for the root class of the link sharing structure.
*
* Returns: NONE
*/
static void
rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
{
panic("rmc_root_overlimit");
}
/*
* Packet Queue handling routines. Eventually, this is to localize the
* effects on the code whether queues are red queues or droptail
* queues.
*/
static int
_rmc_addq(rm_class_t *cl, mbuf_t *m)
{
#ifdef ALTQ_RIO
if (q_is_rio(cl->q_))
return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
#endif
#ifdef ALTQ_RED
if (q_is_red(cl->q_))
return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
#endif /* ALTQ_RED */
#ifdef ALTQ_CODEL
if (q_is_codel(cl->q_))
return codel_addq(cl->codel_, cl->q_, m);
#endif
if (cl->flags_ & RMCF_CLEARDSCP)
write_dsfield(m, cl->pktattr_, 0);
_addq(cl->q_, m);
return (0);
}
/* note: _rmc_dropq is not called for red */
static void
_rmc_dropq(rm_class_t *cl)
{
mbuf_t *m;
if ((m = _getq(cl->q_)) != NULL)
m_freem(m);
}
static mbuf_t *
_rmc_getq(rm_class_t *cl)
{
#ifdef ALTQ_RIO
if (q_is_rio(cl->q_))
return rio_getq((rio_t *)cl->red_, cl->q_);
#endif
#ifdef ALTQ_RED
if (q_is_red(cl->q_))
return red_getq(cl->red_, cl->q_);
#endif
#ifdef ALTQ_CODEL
if (q_is_codel(cl->q_))
return codel_getq(cl->codel_, cl->q_);
#endif
return _getq(cl->q_);
}
static mbuf_t *
_rmc_pollq(rm_class_t *cl)
{
return qhead(cl->q_);
}
#ifdef CBQ_TRACE
struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
struct cbqtrace *cbqtrace_ptr = NULL;
int cbqtrace_count;
/*
* DDB hook to trace cbq events:
* the last 1024 events are held in a circular buffer.
* use "call cbqtrace_dump(N)" to display 20 events from Nth event.
*/
void cbqtrace_dump(int);
static char *rmc_funcname(void *);
static struct rmc_funcs {
void *func;
char *name;
} rmc_funcs[] =
{
rmc_init, "rmc_init",
rmc_queue_packet, "rmc_queue_packet",
rmc_under_limit, "rmc_under_limit",
rmc_update_class_util, "rmc_update_class_util",
rmc_delay_action, "rmc_delay_action",
rmc_restart, "rmc_restart",
_rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
NULL, NULL
};
static char *rmc_funcname(void *func)
{
struct rmc_funcs *fp;
for (fp = rmc_funcs; fp->func != NULL; fp++)
if (fp->func == func)
return (fp->name);
return ("unknown");
}
void cbqtrace_dump(int counter)
{
int i, *p;
char *cp;
counter = counter % NCBQTRACE;
p = (int *)&cbqtrace_buffer[counter];
for (i=0; i<20; i++) {
printf("[0x%x] ", *p++);
printf("%s: ", rmc_funcname((void *)*p++));
cp = (char *)p++;
printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
printf("%d\n",*p++);
if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
p = (int *)cbqtrace_buffer;
}
}
#endif /* CBQ_TRACE */
#endif /* ALTQ_CBQ */
#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \
defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL)
#if !defined(__GNUC__) || defined(ALTQ_DEBUG)
void
_addq(class_queue_t *q, mbuf_t *m)
{
mbuf_t *m0;
if ((m0 = qtail(q)) != NULL)
m->m_nextpkt = m0->m_nextpkt;
else
m0 = m;
m0->m_nextpkt = m;
qtail(q) = m;
qlen(q)++;
}
mbuf_t *
_getq(class_queue_t *q)
{
mbuf_t *m, *m0;
if ((m = qtail(q)) == NULL)
return (NULL);
if ((m0 = m->m_nextpkt) != m)
m->m_nextpkt = m0->m_nextpkt;
else {
ASSERT(qlen(q) == 1);
qtail(q) = NULL;
}
qlen(q)--;
m0->m_nextpkt = NULL;
return (m0);
}
/* drop a packet at the tail of the queue */
mbuf_t *
_getq_tail(class_queue_t *q)
{
mbuf_t *m, *m0, *prev;
if ((m = m0 = qtail(q)) == NULL)
return NULL;
do {
prev = m0;
m0 = m0->m_nextpkt;
} while (m0 != m);
prev->m_nextpkt = m->m_nextpkt;
if (prev == m) {
ASSERT(qlen(q) == 1);
qtail(q) = NULL;
} else
qtail(q) = prev;
qlen(q)--;
m->m_nextpkt = NULL;
return (m);
}
/* randomly select a packet in the queue */
mbuf_t *
_getq_random(class_queue_t *q)
{
struct mbuf *m;
int i, n;
if ((m = qtail(q)) == NULL)
return NULL;
if (m->m_nextpkt == m) {
ASSERT(qlen(q) == 1);
qtail(q) = NULL;
} else {
struct mbuf *prev = NULL;
n = arc4random() % qlen(q) + 1;
for (i = 0; i < n; i++) {
prev = m;
m = m->m_nextpkt;
}
prev->m_nextpkt = m->m_nextpkt;
if (m == qtail(q))
qtail(q) = prev;
}
qlen(q)--;
m->m_nextpkt = NULL;
return (m);
}
void
_removeq(class_queue_t *q, mbuf_t *m)
{
mbuf_t *m0, *prev;
m0 = qtail(q);
do {
prev = m0;
m0 = m0->m_nextpkt;
} while (m0 != m);
prev->m_nextpkt = m->m_nextpkt;
if (prev == m)
qtail(q) = NULL;
else if (qtail(q) == m)
qtail(q) = prev;
qlen(q)--;
}
void
_flushq(class_queue_t *q)
{
mbuf_t *m;
while ((m = _getq(q)) != NULL)
m_freem(m);
ASSERT(qlen(q) == 0);
}
#endif /* !__GNUC__ || ALTQ_DEBUG */
#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */