summaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_bpf.c333
-rw-r--r--net/sched/cls_api.c14
-rw-r--r--net/sched/cls_basic.c6
-rw-r--r--net/sched/cls_bpf.c221
-rw-r--r--net/sched/cls_cgroup.c6
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_fw.c34
-rw-r--r--net/sched/cls_route.c26
-rw-r--r--net/sched/cls_rsvp.h12
-rw-r--r--net/sched/cls_tcindex.c6
-rw-r--r--net/sched/cls_u32.c30
-rw-r--r--net/sched/em_text.c3
-rw-r--r--net/sched/ematch.c1
-rw-r--r--net/sched/sch_api.c14
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_ingress.c9
-rw-r--r--net/sched/sch_netem.c3
17 files changed, 538 insertions, 190 deletions
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 82c5d7fc1988..4d2cede17468 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -13,71 +13,140 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/filter.h>
+#include <linux/bpf.h>
+
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
-#define BPF_TAB_MASK 15
+#define BPF_TAB_MASK 15
+#define ACT_BPF_NAME_LEN 256
+
+struct tcf_bpf_cfg {
+ struct bpf_prog *filter;
+ struct sock_filter *bpf_ops;
+ char *bpf_name;
+ u32 bpf_fd;
+ u16 bpf_num_ops;
+};
-static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
- struct tcf_bpf *b = a->priv;
- int action;
- int filter_res;
-
- spin_lock(&b->tcf_lock);
- b->tcf_tm.lastuse = jiffies;
- bstats_update(&b->tcf_bstats, skb);
- action = b->tcf_action;
-
- filter_res = BPF_PROG_RUN(b->filter, skb);
- if (filter_res == 0) {
- /* Return code 0 from the BPF program
- * is being interpreted as a drop here.
- */
- action = TC_ACT_SHOT;
- b->tcf_qstats.drops++;
+ struct tcf_bpf *prog = act->priv;
+ int action, filter_res;
+
+ spin_lock(&prog->tcf_lock);
+
+ prog->tcf_tm.lastuse = jiffies;
+ bstats_update(&prog->tcf_bstats, skb);
+
+ /* Needed here for accessing maps. */
+ rcu_read_lock();
+ filter_res = BPF_PROG_RUN(prog->filter, skb);
+ rcu_read_unlock();
+
+ /* A BPF program may overwrite the default action opcode.
+ * Similarly as in cls_bpf, if filter_res == -1 we use the
+ * default action specified from tc.
+ *
+ * In case a different well-known TC_ACT opcode has been
+ * returned, it will overwrite the default one.
+ *
+ * For everything else that is unkown, TC_ACT_UNSPEC is
+ * returned.
+ */
+ switch (filter_res) {
+ case TC_ACT_PIPE:
+ case TC_ACT_RECLASSIFY:
+ case TC_ACT_OK:
+ action = filter_res;
+ break;
+ case TC_ACT_SHOT:
+ action = filter_res;
+ prog->tcf_qstats.drops++;
+ break;
+ case TC_ACT_UNSPEC:
+ action = prog->tcf_action;
+ break;
+ default:
+ action = TC_ACT_UNSPEC;
+ break;
}
- spin_unlock(&b->tcf_lock);
+ spin_unlock(&prog->tcf_lock);
return action;
}
-static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
+{
+ return !prog->bpf_ops;
+}
+
+static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
+ struct sk_buff *skb)
+{
+ struct nlattr *nla;
+
+ if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
+ return -EMSGSIZE;
+
+ nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
+ sizeof(struct sock_filter));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+ return 0;
+}
+
+static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
+ struct sk_buff *skb)
+{
+ if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
+ return -EMSGSIZE;
+
+ if (prog->bpf_name &&
+ nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
int bind, int ref)
{
unsigned char *tp = skb_tail_pointer(skb);
- struct tcf_bpf *b = a->priv;
+ struct tcf_bpf *prog = act->priv;
struct tc_act_bpf opt = {
- .index = b->tcf_index,
- .refcnt = b->tcf_refcnt - ref,
- .bindcnt = b->tcf_bindcnt - bind,
- .action = b->tcf_action,
+ .index = prog->tcf_index,
+ .refcnt = prog->tcf_refcnt - ref,
+ .bindcnt = prog->tcf_bindcnt - bind,
+ .action = prog->tcf_action,
};
- struct tcf_t t;
- struct nlattr *nla;
+ struct tcf_t tm;
+ int ret;
if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
- if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
- goto nla_put_failure;
-
- nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
- sizeof(struct sock_filter));
- if (!nla)
+ if (tcf_bpf_is_ebpf(prog))
+ ret = tcf_bpf_dump_ebpf_info(prog, skb);
+ else
+ ret = tcf_bpf_dump_bpf_info(prog, skb);
+ if (ret)
goto nla_put_failure;
- memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+ tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
+ tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
+ tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
- t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
- t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
- t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
- if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+ if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
goto nla_put_failure;
+
return skb->len;
nla_put_failure:
@@ -87,36 +156,21 @@ nla_put_failure:
static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
[TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
+ [TCA_ACT_BPF_FD] = { .type = NLA_U32 },
+ [TCA_ACT_BPF_NAME] = { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
[TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
[TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
};
-static int tcf_bpf_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action *a,
- int ovr, int bind)
+static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
{
- struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
- struct tc_act_bpf *parm;
- struct tcf_bpf *b;
- u16 bpf_size, bpf_num_ops;
struct sock_filter *bpf_ops;
- struct sock_fprog_kern tmp;
+ struct sock_fprog_kern fprog_tmp;
struct bpf_prog *fp;
+ u16 bpf_size, bpf_num_ops;
int ret;
- if (!nla)
- return -EINVAL;
-
- ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
- if (ret < 0)
- return ret;
-
- if (!tb[TCA_ACT_BPF_PARMS] ||
- !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
- return -EINVAL;
- parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
-
bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
return -EINVAL;
@@ -126,68 +180,165 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return -EINVAL;
bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
- if (!bpf_ops)
+ if (bpf_ops == NULL)
return -ENOMEM;
memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
- tmp.len = bpf_num_ops;
- tmp.filter = bpf_ops;
+ fprog_tmp.len = bpf_num_ops;
+ fprog_tmp.filter = bpf_ops;
- ret = bpf_prog_create(&fp, &tmp);
- if (ret)
- goto free_bpf_ops;
+ ret = bpf_prog_create(&fp, &fprog_tmp);
+ if (ret < 0) {
+ kfree(bpf_ops);
+ return ret;
+ }
- if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
- if (ret)
+ cfg->bpf_ops = bpf_ops;
+ cfg->bpf_num_ops = bpf_num_ops;
+ cfg->filter = fp;
+
+ return 0;
+}
+
+static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
+{
+ struct bpf_prog *fp;
+ char *name = NULL;
+ u32 bpf_fd;
+
+ bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
+
+ fp = bpf_prog_get(bpf_fd);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
+ if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
+ bpf_prog_put(fp);
+ return -EINVAL;
+ }
+
+ if (tb[TCA_ACT_BPF_NAME]) {
+ name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
+ nla_len(tb[TCA_ACT_BPF_NAME]),
+ GFP_KERNEL);
+ if (!name) {
+ bpf_prog_put(fp);
+ return -ENOMEM;
+ }
+ }
+
+ cfg->bpf_fd = bpf_fd;
+ cfg->bpf_name = name;
+ cfg->filter = fp;
+
+ return 0;
+}
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *act,
+ int replace, int bind)
+{
+ struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tc_act_bpf *parm;
+ struct tcf_bpf *prog;
+ struct tcf_bpf_cfg cfg;
+ bool is_bpf, is_ebpf;
+ int ret;
+
+ if (!nla)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+ if (ret < 0)
+ return ret;
+
+ is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
+ is_ebpf = tb[TCA_ACT_BPF_FD];
+
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+ !tb[TCA_ACT_BPF_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+ memset(&cfg, 0, sizeof(cfg));
+
+ ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
+ tcf_bpf_init_from_efd(tb, &cfg);
+ if (ret < 0)
+ return ret;
+
+ if (!tcf_hash_check(parm->index, act, bind)) {
+ ret = tcf_hash_create(parm->index, est, act,
+ sizeof(*prog), bind);
+ if (ret < 0)
goto destroy_fp;
ret = ACT_P_CREATED;
} else {
+ /* Don't override defaults. */
if (bind)
goto destroy_fp;
- tcf_hash_release(a, bind);
- if (!ovr) {
+
+ tcf_hash_release(act, bind);
+ if (!replace) {
ret = -EEXIST;
goto destroy_fp;
}
}
- b = to_bpf(a);
- spin_lock_bh(&b->tcf_lock);
- b->tcf_action = parm->action;
- b->bpf_num_ops = bpf_num_ops;
- b->bpf_ops = bpf_ops;
- b->filter = fp;
- spin_unlock_bh(&b->tcf_lock);
+ prog = to_bpf(act);
+ spin_lock_bh(&prog->tcf_lock);
+
+ prog->bpf_ops = cfg.bpf_ops;
+ prog->bpf_name = cfg.bpf_name;
+
+ if (cfg.bpf_num_ops)
+ prog->bpf_num_ops = cfg.bpf_num_ops;
+ if (cfg.bpf_fd)
+ prog->bpf_fd = cfg.bpf_fd;
+
+ prog->tcf_action = parm->action;
+ prog->filter = cfg.filter;
+
+ spin_unlock_bh(&prog->tcf_lock);
if (ret == ACT_P_CREATED)
- tcf_hash_insert(a);
+ tcf_hash_insert(act);
+
return ret;
destroy_fp:
- bpf_prog_destroy(fp);
-free_bpf_ops:
- kfree(bpf_ops);
+ if (is_ebpf)
+ bpf_prog_put(cfg.filter);
+ else
+ bpf_prog_destroy(cfg.filter);
+
+ kfree(cfg.bpf_ops);
+ kfree(cfg.bpf_name);
+
return ret;
}
-static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+static void tcf_bpf_cleanup(struct tc_action *act, int bind)
{
- struct tcf_bpf *b = a->priv;
+ const struct tcf_bpf *prog = act->priv;
- bpf_prog_destroy(b->filter);
+ if (tcf_bpf_is_ebpf(prog))
+ bpf_prog_put(prog->filter);
+ else
+ bpf_prog_destroy(prog->filter);
}
-static struct tc_action_ops act_bpf_ops = {
- .kind = "bpf",
- .type = TCA_ACT_BPF,
- .owner = THIS_MODULE,
- .act = tcf_bpf,
- .dump = tcf_bpf_dump,
- .cleanup = tcf_bpf_cleanup,
- .init = tcf_bpf_init,
+static struct tc_action_ops act_bpf_ops __read_mostly = {
+ .kind = "bpf",
+ .type = TCA_ACT_BPF,
+ .owner = THIS_MODULE,
+ .act = tcf_bpf,
+ .dump = tcf_bpf_dump,
+ .cleanup = tcf_bpf_cleanup,
+ .init = tcf_bpf_init,
};
static int __init bpf_init_module(void)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index baef987fe2c0..8b0470e418dc 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -286,7 +286,7 @@ replay:
RCU_INIT_POINTER(*back, next);
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
err = 0;
goto errout;
}
@@ -301,14 +301,20 @@ replay:
err = -EEXIST;
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
goto errout;
}
break;
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
- if (err == 0)
+ if (err == 0) {
tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+ if (tcf_destroy(tp, false)) {
+ struct tcf_proto *next = rtnl_dereference(tp->next);
+
+ RCU_INIT_POINTER(*back, next);
+ }
+ }
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
@@ -329,7 +335,7 @@ replay:
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
} else {
if (tp_created)
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index fc399db86f11..0b8c3ace671f 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -96,11 +96,14 @@ static void basic_delete_filter(struct rcu_head *head)
kfree(f);
}
-static void basic_destroy(struct tcf_proto *tp)
+static bool basic_destroy(struct tcf_proto *tp, bool force)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
+ if (!force && !list_empty(&head->flist))
+ return false;
+
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
@@ -108,6 +111,7 @@ static void basic_destroy(struct tcf_proto *tp)
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int basic_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5f3ee9e4b5bf..5c4171c5d2bd 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -16,6 +16,8 @@
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
+#include <linux/bpf.h>
+
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
@@ -24,6 +26,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
+#define CLS_BPF_NAME_LEN 256
+
struct cls_bpf_head {
struct list_head plist;
u32 hgen;
@@ -32,18 +36,24 @@ struct cls_bpf_head {
struct cls_bpf_prog {
struct bpf_prog *filter;
- struct sock_filter *bpf_ops;
- struct tcf_exts exts;
- struct tcf_result res;
struct list_head link;
+ struct tcf_result res;
+ struct tcf_exts exts;
u32 handle;
- u16 bpf_num_ops;
+ union {
+ u32 bpf_fd;
+ u16 bpf_num_ops;
+ };
+ struct sock_filter *bpf_ops;
+ const char *bpf_name;
struct tcf_proto *tp;
struct rcu_head rcu;
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_FD] = { .type = NLA_U32 },
+ [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
[TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
[TCA_BPF_OPS] = { .type = NLA_BINARY,
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -54,8 +64,10 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
struct cls_bpf_prog *prog;
- int ret;
+ int ret = -1;
+ /* Needed here for accessing maps. */
+ rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
int filter_res = BPF_PROG_RUN(prog->filter, skb);
@@ -70,10 +82,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (ret < 0)
continue;
- return ret;
+ break;
}
+ rcu_read_unlock();
- return -1;
+ return ret;
+}
+
+static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
+{
+ return !prog->bpf_ops;
}
static int cls_bpf_init(struct tcf_proto *tp)
@@ -94,8 +112,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
- bpf_prog_destroy(prog->filter);
+ if (cls_bpf_is_ebpf(prog))
+ bpf_prog_put(prog->filter);
+ else
+ bpf_prog_destroy(prog->filter);
+ kfree(prog->bpf_name);
kfree(prog->bpf_ops);
kfree(prog);
}
@@ -114,14 +136,18 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
+ if (!force && !list_empty(&head->plist))
+ return false;
+
list_for_each_entry_safe(prog, tmp, &head->plist, link) {
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
@@ -130,6 +156,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
@@ -151,69 +178,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
return ret;
}
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
- struct cls_bpf_prog *prog,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+static int cls_bpf_prog_from_ops(struct nlattr **tb,
+ struct cls_bpf_prog *prog, u32 classid)
{
struct sock_filter *bpf_ops;
- struct tcf_exts exts;
- struct sock_fprog_kern tmp;
+ struct sock_fprog_kern fprog_tmp;
struct bpf_prog *fp;
u16 bpf_size, bpf_num_ops;
- u32 classid;
int ret;
- if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
- return -EINVAL;
-
- tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
- ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
- if (ret < 0)
- return ret;
-
- classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
- if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
- ret = -EINVAL;
- goto errout;
- }
+ if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
+ return -EINVAL;
bpf_size = bpf_num_ops * sizeof(*bpf_ops);
- if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
- ret = -EINVAL;
- goto errout;
- }
+ if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
+ return -EINVAL;
bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
- if (bpf_ops == NULL) {
- ret = -ENOMEM;
- goto errout;
- }
+ if (bpf_ops == NULL)
+ return -ENOMEM;
memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
- tmp.len = bpf_num_ops;
- tmp.filter = bpf_ops;
+ fprog_tmp.len = bpf_num_ops;
+ fprog_tmp.filter = bpf_ops;
- ret = bpf_prog_create(&fp, &tmp);
- if (ret)
- goto errout_free;
+ ret = bpf_prog_create(&fp, &fprog_tmp);
+ if (ret < 0) {
+ kfree(bpf_ops);
+ return ret;
+ }
- prog->bpf_num_ops = bpf_num_ops;
prog->bpf_ops = bpf_ops;
+ prog->bpf_num_ops = bpf_num_ops;
+ prog->bpf_name = NULL;
+
+ prog->filter = fp;
+ prog->res.classid = classid;
+
+ return 0;
+}
+
+static int cls_bpf_prog_from_efd(struct nlattr **tb,
+ struct cls_bpf_prog *prog, u32 classid)
+{
+ struct bpf_prog *fp;
+ char *name = NULL;
+ u32 bpf_fd;
+
+ bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
+
+ fp = bpf_prog_get(bpf_fd);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
+ if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
+ bpf_prog_put(fp);
+ return -EINVAL;
+ }
+
+ if (tb[TCA_BPF_NAME]) {
+ name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
+ nla_len(tb[TCA_BPF_NAME]),
+ GFP_KERNEL);
+ if (!name) {
+ bpf_prog_put(fp);
+ return -ENOMEM;
+ }
+ }
+
+ prog->bpf_ops = NULL;
+ prog->bpf_fd = bpf_fd;
+ prog->bpf_name = name;
+
prog->filter = fp;
prog->res.classid = classid;
+ return 0;
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+ struct cls_bpf_prog *prog,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct tcf_exts exts;
+ bool is_bpf, is_ebpf;
+ u32 classid;
+ int ret;
+
+ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+ is_ebpf = tb[TCA_BPF_FD];
+
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+ !tb[TCA_BPF_CLASSID])
+ return -EINVAL;
+
+ tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+ if (ret < 0)
+ return ret;
+
+ classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
+ cls_bpf_prog_from_efd(tb, prog, classid);
+ if (ret < 0) {
+ tcf_exts_destroy(&exts);
+ return ret;
+ }
+
tcf_bind_filter(tp, &prog->res, base);
tcf_exts_change(tp, &prog->exts, &exts);
return 0;
-errout_free:
- kfree(bpf_ops);
-errout:
- tcf_exts_destroy(&exts);
- return ret;
}
static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -297,11 +376,43 @@ errout:
return ret;
}
+static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
+ struct sk_buff *skb)
+{
+ struct nlattr *nla;
+
+ if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
+ return -EMSGSIZE;
+
+ nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
+ sizeof(struct sock_filter));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+ return 0;
+}
+
+static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
+ struct sk_buff *skb)
+{
+ if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
+ return -EMSGSIZE;
+
+ if (prog->bpf_name &&
+ nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *tm)
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
- struct nlattr *nest, *nla;
+ struct nlattr *nest;
+ int ret;
if (prog == NULL)
return skb->len;
@@ -314,16 +425,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
goto nla_put_failure;
- if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
- goto nla_put_failure;
- nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
- sizeof(struct sock_filter));
- if (nla == NULL)
+ if (cls_bpf_is_ebpf(prog))
+ ret = cls_bpf_dump_ebpf_info(prog, skb);
+ else
+ ret = cls_bpf_dump_bpf_info(prog, skb);
+ if (ret)
goto nla_put_failure;
- memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
-
if (tcf_exts_dump(skb, &prog->exts) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221697ab0247..ea611b216412 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -143,14 +143,18 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static bool cls_cgroup_destroy(struct tcf_proto *tp, bool force)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
+ if (!force)
+ return false;
+
if (head) {
RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
}
+ return true;
}
static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 461410394d08..a620c4e288a5 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -557,17 +557,21 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp)
+static bool flow_destroy(struct tcf_proto *tp, bool force)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
+ if (!force && !list_empty(&head->filters))
+ return false;
+
list_for_each_entry_safe(f, next, &head->filters, list) {
list_del_rcu(&f->list);
call_rcu(&f->rcu, flow_destroy_filter);
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index a5269f76004c..715e01e5910a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -33,6 +33,7 @@
struct fw_head {
u32 mask;
+ bool mask_set;
struct fw_filter __rcu *ht[HTSIZE];
struct rcu_head rcu;
};
@@ -113,6 +114,14 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
static int fw_init(struct tcf_proto *tp)
{
+ struct fw_head *head;
+
+ head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ head->mask_set = false;
+ rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -124,14 +133,20 @@ static void fw_delete_filter(struct rcu_head *head)
kfree(f);
}
-static void fw_destroy(struct tcf_proto *tp)
+static bool fw_destroy(struct tcf_proto *tp, bool force)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
int h;
if (head == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h = 0; h < HTSIZE; h++)
+ if (rcu_access_pointer(head->ht[h]))
+ return false;
+ }
for (h = 0; h < HTSIZE; h++) {
while ((f = rtnl_dereference(head->ht[h])) != NULL) {
@@ -143,6 +158,7 @@ static void fw_destroy(struct tcf_proto *tp)
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int fw_delete(struct tcf_proto *tp, unsigned long arg)
@@ -286,17 +302,11 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (!handle)
return -EINVAL;
- if (head == NULL) {
- u32 mask = 0xFFFFFFFF;
+ if (!head->mask_set) {
+ head->mask = 0xFFFFFFFF;
if (tb[TCA_FW_MASK])
- mask = nla_get_u32(tb[TCA_FW_MASK]);
-
- head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
- if (head == NULL)
- return -ENOBUFS;
- head->mask = mask;
-
- rcu_assign_pointer(tp->root, head);
+ head->mask = nla_get_u32(tb[TCA_FW_MASK]);
+ head->mask_set = true;
}
f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 2ecd24688554..08a3b0a6f5ab 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -258,6 +258,13 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
static int route4_init(struct tcf_proto *tp)
{
+ struct route4_head *head;
+
+ head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
+ if (head == NULL)
+ return -ENOBUFS;
+
+ rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -270,13 +277,20 @@ route4_delete_filter(struct rcu_head *head)
kfree(f);
}
-static void route4_destroy(struct tcf_proto *tp)
+static bool route4_destroy(struct tcf_proto *tp, bool force)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
if (head == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h1 = 0; h1 <= 256; h1++) {
+ if (rcu_access_pointer(head->table[h1]))
+ return false;
+ }
+ }
for (h1 = 0; h1 <= 256; h1++) {
struct route4_bucket *b;
@@ -301,6 +315,7 @@ static void route4_destroy(struct tcf_proto *tp)
}
RCU_INIT_POINTER(tp->root, NULL);
kfree_rcu(head, rcu);
+ return true;
}
static int route4_delete(struct tcf_proto *tp, unsigned long arg)
@@ -484,13 +499,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
err = -ENOBUFS;
- if (head == NULL) {
- head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
- if (head == NULL)
- goto errout;
- rcu_assign_pointer(tp->root, head);
- }
-
f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
if (!f)
goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index edd8ade3fbc1..02fa82792dab 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -291,13 +291,20 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
kfree_rcu(f, rcu);
}
-static void rsvp_destroy(struct tcf_proto *tp)
+static bool rsvp_destroy(struct tcf_proto *tp, bool force)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
if (data == NULL)
- return;
+ return true;
+
+ if (!force) {
+ for (h1 = 0; h1 < 256; h1++) {
+ if (rcu_access_pointer(data->ht[h1]))
+ return false;
+ }
+ }
RCU_INIT_POINTER(tp->root, NULL);
@@ -319,6 +326,7 @@ static void rsvp_destroy(struct tcf_proto *tp)
}
}
kfree_rcu(data, rcu);
+ return true;
}
static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index bd49bf547a47..a557dbaf5afe 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -468,11 +468,14 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp)
+static bool tcindex_destroy(struct tcf_proto *tp, bool force)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
+ if (!force)
+ return false;
+
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
walker.count = 0;
walker.skip = 0;
@@ -481,6 +484,7 @@ static void tcindex_destroy(struct tcf_proto *tp)
RCU_INIT_POINTER(tp->root, NULL);
call_rcu(&p->rcu, __tcindex_destroy);
+ return true;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 09487afbfd51..cab9e9b43967 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -78,8 +78,11 @@ struct tc_u_hnode {
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
- struct tc_u_knode __rcu *ht[1];
struct rcu_head rcu;
+ /* The 'ht' field MUST be the last field in structure to allow for
+ * more entries allocated at end of structure.
+ */
+ struct tc_u_knode __rcu *ht[1];
};
struct tc_u_common {
@@ -460,13 +463,35 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
return -ENOENT;
}
-static void u32_destroy(struct tcf_proto *tp)
+static bool ht_empty(struct tc_u_hnode *ht)
+{
+ unsigned int h;
+
+ for (h = 0; h <= ht->divisor; h++)
+ if (rcu_access_pointer(ht->ht[h]))
+ return false;
+
+ return true;
+}
+
+static bool u32_destroy(struct tcf_proto *tp, bool force)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
WARN_ON(root_ht == NULL);
+ if (!force) {
+ if (root_ht) {
+ if (root_ht->refcnt > 1)
+ return false;
+ if (root_ht->refcnt == 1) {
+ if (!ht_empty(root_ht))
+ return false;
+ }
+ }
+ }
+
if (root_ht && --root_ht->refcnt == 0)
u32_destroy_hnode(tp, root_ht);
@@ -491,6 +516,7 @@ static void u32_destroy(struct tcf_proto *tp)
}
tp->data = NULL;
+ return true;
}
static int u32_delete(struct tcf_proto *tp, unsigned long arg)
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index f03c3de16c27..73e2ed576ceb 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -34,7 +34,6 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
{
struct text_match *tm = EM_TEXT_PRIV(m);
int from, to;
- struct ts_state state;
from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data;
from += tm->from_offset;
@@ -42,7 +41,7 @@ static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data;
to += tm->to_offset;
- return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX;
+ return skb_find_text(skb, from, to, tm->config) != UINT_MAX;
}
static int em_text_change(struct net *net, void *data, int len,
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 6742200b1307..fbb7ebfc58c6 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -228,6 +228,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
* to replay the request.
*/
module_put(em->ops->owner);
+ em->ops = NULL;
err = -EAGAIN;
}
#endif
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 243b7d169d61..ad9eed70bc8f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1858,11 +1858,15 @@ reclassify:
}
EXPORT_SYMBOL(tc_classify);
-void tcf_destroy(struct tcf_proto *tp)
+bool tcf_destroy(struct tcf_proto *tp, bool force)
{
- tp->ops->destroy(tp);
- module_put(tp->ops->owner);
- kfree_rcu(tp, rcu);
+ if (tp->ops->destroy(tp, force)) {
+ module_put(tp->ops->owner);
+ kfree_rcu(tp, rcu);
+ return true;
+ }
+
+ return false;
}
void tcf_destroy_chain(struct tcf_proto __rcu **fl)
@@ -1871,7 +1875,7 @@ void tcf_destroy_chain(struct tcf_proto __rcu **fl)
while ((tp = rtnl_dereference(*fl)) != NULL) {
RCU_INIT_POINTER(*fl, tp->next);
- tcf_destroy(tp);
+ tcf_destroy(tp, true);
}
}
EXPORT_SYMBOL(tcf_destroy_chain);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index dfcea20e3171..f377702d4b91 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -8,7 +8,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Meant to be mostly used for localy generated traffic :
+ * Meant to be mostly used for locally generated traffic :
* Fast classification depends on skb->sk being set before reaching us.
* If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
* All packets belonging to a socket are considered as a 'flow'.
@@ -63,7 +63,7 @@ struct fq_flow {
struct sk_buff *tail; /* last skb in the list */
unsigned long age; /* jiffies when flow was emptied, for gc */
};
- struct rb_node fq_node; /* anchor in fq_root[] trees */
+ struct rb_node fq_node; /* anchor in fq_root[] trees */
struct sock *sk;
int qlen; /* number of packets in flow queue */
int credit;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index eb5b8445fef9..4cdbfb85686a 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -88,11 +88,19 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* ------------------------------------------------------------- */
+static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ net_inc_ingress_queue();
+
+ return 0;
+}
+
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
tcf_destroy_chain(&p->filter_list);
+ net_dec_ingress_queue();
}
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -124,6 +132,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.id = "ingress",
.priv_size = sizeof(struct ingress_qdisc_data),
.enqueue = ingress_enqueue,
+ .init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 179f1c8c0d8b..956ead2cab9a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -560,8 +560,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
tfifo_dequeue:
skb = __skb_dequeue(&sch->q);
if (skb) {
-deliver:
qdisc_qstats_backlog_dec(sch, skb);
+deliver:
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
@@ -578,6 +578,7 @@ deliver:
rb_erase(p, &q->t_root);
sch->q.qlen--;
+ qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
skb->tstamp = netem_skb_cb(skb)->tstamp_save;