IP层收发报⽂简要剖析3--ip输⼊报⽂分⽚重组
在ip_local_deliver中,如果检测到是分⽚包,则需要将报⽂进⾏重组。其所有的分⽚被重新组合后才能提交到上层协议,每⼀个被重新组合的数据包⽂⽤ipq结构实例来表⽰
struct ipq {
struct inet_frag_queue q;
u32 user;//分⽚来源
__be32 saddr;//原地址
__be32 daddr;//⽬的地址
__be16 id;//ip报⽂序列号
u8 protocol;//上层协议号
//这四个字段来⾃ip⾸部是为了确定来⾃哪个ip数据报⽂
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
int vif; /* L3 master device index */
unsigned int rid;//已收到的分⽚计数器
struct inet_peer *peer;//记录发送⽅信息
//通过rid peer 可以防⽌Dos攻击
};
struct inet_frags {
struct inet_frag_bucket hash[INETFRAGS_HASHSZ];//哈希队列 struct work_struct frags_work;//⼯作队列
unsigned int next_bucket;
unsigned long last_rebuild_jiffies;
bool rebuild;
/* The first call to hashfn is responsible to initialize
* rnd. This is best done with net_get_random_once.
*
* rnd_seqlock is used to let hash insertion detect
* when it needs to re-lookup the hash chain to use.
*/
u32 rnd;//随机数
seqlock_t rnd_seqlock;//
int qsize;//队列长度
unsigned int (*hashfn)(const struct inet_frag_queue *);
bool (*match)(const struct inet_frag_queue *q,
const void *arg);//分段队列匹配函数
void (*constructor)(struct inet_frag_queue *q,
const void *arg);
void (*destructor)(struct inet_frag_queue *);
void (*frag_expire)(unsigned long data);//队列过期处理函数
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
};
struct netns_frags {
/* The percpu_counter "mem" need to be cacheline aligned.
保安对讲机
* unt must not share cacheline with other writers
*/
struct percpu_counter mem ____cacheline_aligned_in_smp;
/* sysctls */
int timeout;超时时间
int low_thresh;内存使⽤下限
METLERTOLEDOint max_dist;
};
View Code
/**
* struct inet_frag_queue - fragment queue
*
* @lock: spinlock protecting the queue
* @timer: queue expiration timer
* @list: hash bucket list
* @refcnt: reference count of the queue
* @fragments: received fragments head
* @fragments_tail: received fragments tail
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
* @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
*/
struct inet_frag_queue {//inet分段队列头
spinlock_t lock;smp环境下需要
struct timer_list timer;队列定时器,组装⾮常耗时,不能⽆休⽌的等待分⽚的到达struct hlist_node list;哈希节点,链⼊inet分段管理结构的哈希队列
atomic_t refcnt;计数器
struct sk_buff *fragments;分段数据包队列
struct sk_buff *fragments_tail;
ktime_t stamp;时间戳
int len;数据包结束位置offset+len
int meat;与原数据长度的差距,如果和原数据包长度⼀样代表接收完成
__u8 flags;
u16 max_size;
struct netns_frags *net;指向⽹络空寂分段管理结构
struct hlist_node list_evictor;
};
1.1、 IP分组的初始化
void __init ipfrag_init(void)
{
ip4_frags_ctl_register();
register_pernet_subsys(&ip4_frags_ops);//向内核注册ipv4分段管理函数
ip4_frags.hashfn = ip4_hashfn;//设置计算hash的函数
//设置初始化ip 分段队列的构造函数
structor = ip4_frag_init;
//析构函数
ip4_frags.destructor = ip4_frag_free;
//队列机构长度
ip4_frags.qsize = sizeof(struct ipq);
/
/对⽐ip分段队列hook
ip4_frags.match = ip4_frag_match;
//设置分段队列过期处理函数
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
}
int inet_frags_init(struct inet_frags *f)
{
int i;
/
/初始化⼯作队列
INIT_WORK(&f->frags_work, inet_frag_worker);
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
struct inet_frag_bucket *hb = &f->hash[i];//初始化hash 队列头
spin_lock_init(&hb->chain_lock);
INIT_HLIST_HEAD(&hb->chain);
}
seqlock_init(&f->rnd_seqlock);
f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
return -ENOMEM;
return0;
}
EXPORT_SYMBOL(inet_frags_init);
int ip_local_deliver(struct sk_buff *skb)
{
/*
* Reassemble IP fragments.
*/
struct net *net = dev_net(skb->dev);
/
* 分⽚重组 */
if (ip_is_fragment(ip_hdr(skb))) {
if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
return0;
}
/* 经过LOCAL_IN钩⼦点 */
return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
net, NULL, skb, skb->dev, NULL,
ip_local_deliver_finish);
}
1.2、 ip分⽚报⽂重组的处理
/
* Process an incoming IP datagram fragment. */
int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
int vif = l3mdev_master_ifindex_rcu(dev);
struct ipq *qp;
//递增计数
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
skb_orphan(skb);
/* Lookup (or create) queue header* 查或创建IP分⽚队列 */
qp = ip_find(net, ip_hdr(skb), user, vif);
if (qp) {/* 分⽚队列存在 */
int ret;
spin_lock(&qp->q.lock);
ret = ip_frag_queue(qp, skb);//分⽚数据包⼊队重组数据包
spin_unlock(&qp->q.lock);
ipq_put(qp);
return ret;
}
/* 创建新的ip分⽚队列失败,内存不⾜递增失败计数*/
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -ENOMEM;
}
EXPORT_SYMBOL(ip_defrag);
1.2.2 ip_find 根据ip⾸部以及user标志在ipq散列表中查对应的ipq。
/* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram, and create new one, if nothing is found.
enum ip_defrag_users {
IP_DEFRAG_LOCAL_DELIVER,
IP_DEFRAG_CALL_RA_CHAIN,
硫芥子气
IP_DEFRAG_CONNTRACK_IN,
__IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHRT_MAX,
IP_DEFRAG_CONNTRACK_OUT,
__IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
IP_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, IP_DEFRAG_VS_IN,
IP_DEFRAG_VS_OUT,
IP_DEFRAG_VS_FWD,
IP_DEFRAG_AF_PACKET,
IP_DEFRAG_MACVLAN,
};
*/
static struct ipq *ip_find(struct net *net, struct iphdr *iph,
u32 user, int vif)
{
struct inet_frag_queue *q;
struct ip4_create_arg arg;
unsigned int hash;
/* 记录ip头和输⼊信息 */
arg.iph = iph;
arg.user = user;
arg.vif = vif;
/* 通过id,源地址,⽬的地址,协议计算hash */
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
/
* 根据hash值查或创建队列 */
q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
if (IS_ERR_OR_NULL(q)) {
inet_frag_maybe_warn_overflow(q, pr_fmt());
return NULL;
}
return container_of(q, struct ipq, q);
}
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key,
unsigned int hash)
{
struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
int depth = 0;
/* 分⽚内存已经超过了低限 */
if (frag_mem_limit(nf) > nf->low_thresh)
/* 进⾏节点回收 */
inet_frag_schedule_worker(f); //⼯作队列回调函数为inet_frag_worker
hash &= (INETFRAGS_HASHSZ - 1);
hb = &f->hash[hash]; /* 到hash桶 */
spin_lock(&hb->chain_lock);
hlist_for_each_entry(q, &hb->chain, list) { /* 遍历链表 */
if (q->net == nf && f->match(q, key)) {
湍流耗散率
atomic_inc(&q->refcnt); /* 增加引⽤计数 */
spin_unlock(&hb->chain_lock);
return q;
}
depth++;/* 记录查深度 */
}
spin_unlock(&hb->chain_lock);
/* 未到 */
/* 桶节点的链表深度不超过限定 */
if (depth <= INETFRAGS_MAXDEPTH)
return inet_frag_create(nf, f, key);/* 创建节点返回 */
if (inet_frag_may_rebuild(f)) {
/* 如果已经超过了重建间隔时间,则重建 */
if (!f->rebuild)
f->rebuild = true;
inet_frag_schedule_worker(f);
}
return ERR_PTR(-ENOBUFS);
}
EXPORT_SYMBOL(inet_frag_find);
View Code
如果查不到则会创建⼀个ipq 并将其插⼊链表中
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q;
q = inet_frag_alloc(nf, f, arg);//分配队列头结构空间
if (!q)
return NULL;
return inet_frag_intern(nf, q, f, arg);
}
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
光端机箱
void *arg)
{
struct inet_frag_queue *q;
if (frag_mem_limit(nf) > nf->high_thresh) {//内存超过警戒线回收内存
inet_frag_schedule_worker(f);
return NULL;
}
去鱼鳞机
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
return NULL;
q->net = nf;//记录下⽹络空间的分段管理结构指针
f->constructor(q, arg);//之前初始化时,构造函数来初始化-ip4_frag_init
add_frag_mem_limit(nf, f->qsize);//sum ⽹络空间的分段内存
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);//定时器initand run spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
return q;
}
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);//获取分段队列指针
struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
const struct ip4_create_arg *arg = a;//ipv4的分段信息指针
qp->protocol = arg->iph->protocol;//IP层头部协议
qp->id = arg->iph->id;//ip层id
qp->ecn = ip4_frag_ecn(arg->iph->tos);
qp->saddr = arg->iph->saddr;
qp->daddr = arg->iph->daddr;
qp->vif = arg->vif;
qp->user = arg->user;
//记录对⽅信息
qp->peer = q->net->max_dist ?
inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
NULL;
}
static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in,
struct inet_frags *f,
void *arg)
{
struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
* such entry could have been created on other cpu before
* we acquired hash bucket lock.
*/
hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock);
qp_in->flags |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
}
}
#endif
qp = qp_in;
if (!mod_timer(&qp->timer, jiffies + nf->timeout))
atomic_inc(&qp->refcnt);
atomic_inc(&qp->refcnt);//链⼊inet分段管理结构的hash队列
hlist_add_head(&qp->list, &hb->chain);
spin_unlock(&hb->chain_lock);
return qp;
}
View Code
1/2/3 分⽚数据包加⼊重组数据包
/* Add new segment to existing queue. */
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct sk_buff *prev, *next;
struct net_device *dev;
unsigned int fragsize;
int flags, offset;
int ihl, end;
int err = -ENOENT;
u8 ecn;
if (qp->q.flags & INET_FRAG_COMPLETE) //分段队列接收完成则释放此分⽚返回goto err;
/*数据包没有分段标志or 分段队列间隔过⼤
//重现调整分段队列是否出错
如果不是本地⽣成的分⽚,则调⽤ip_frag_too_far 检测
是否存在 dos攻击,存在攻击则调⽤邋ip_frag_reinit释放