|
表驱动IP过滤器的基本工作过程 (1) Linux的IP过滤器主要采用了表驱动方法, 表驱动入口函数为ipt_do_table(), 驱动表称为IP表, 用ip_table结构描述. IP表是位置无关的数据块, 它由表头(ipt_table_info)和变长的规则链组成. 表头包含了IP表的尺寸,规则数量和不同的过滤编号对应的规则项的起始位置. 规则链是由多个变长的规则项组成, 每个规则项可分为匹配链和匹配靶两部分, 匹配链由IP匹配模板(ipt_entry)和可选的多个扩展匹配操作项组成, 匹配项由匹配结构(ipt_entry_match)和变长参数区组成, 匹配结构包含指向匹配器(ipt_match)模块的指针. 匹配靶则由靶结构(ipt_entry_target)和变长参数区组成, 靶结构包含指向靶处理(ipt_target)模块的指针. (2) ipt_do_table()对输入的IP包首先沿着IP表规则项的链头IP模板进行匹配, 当规则头匹配成功时, 如果存在后继扩展匹配链, 则进行链式扩展匹配. 当所有的匹配都成功后, 则运行相应的规则靶, 如果规则靶函数返回IPT_CONTINUE, 则继续表中下一规则项匹配, 否则过滤过程结束并返回. 如果靶函数为空, 则说明它是一个标准靶(ipt_standard_target), 标准靶的参数区具有一个判别字(verdict), 它的值如果非负则表示它是一个转移靶, 指向另一个规则项. 如果判别字为负且等于IPT_RETURN, 则说明标准靶是一转移返回靶, 否则将判别字取反减一后作为过滤器的返回值中止过滤过程. ; include/linux/netfilter_ipv4/ip_tables.h: #define IPT_TABLE_MAXNAMELEN 32 /* Furniture shopping... */ struct ipt_table 驱动表描述结构 { struct list_head list; /* A unique name... */ char name[IPT_TABLE_MAXNAMELEN]; /* Seed table: copied in register_table */ struct ipt_replace *table; /* What hooks you will enter on */ unsigned int valid_hooks; /* Lock for the curtain */ rwlock_t lock; /* Man behind the curtain... */ struct ipt_table_info *private; 指向IP表 }; /* This structure defines each of the firewall rules. Consists of 3 parts which are 1) general IP header stuff 2) match specific stuff 3) the target to perform if the rule matches */ struct ipt_entry 规则项起始匹配单元 { struct ipt_ip ip; IP包匹配模板 /* Mark with fields that we care about. */ unsigned int nfcache; /* Size of ipt_entry + matches */ u_int16_t target_offset; 匹配链尺寸 /* Size of ipt_entry + matches + target */ u_int16_t next_offset; 规则项尺寸 /* Back pointer */ unsigned int comefrom; /* Packet and byte counters. */ struct ipt_counters counters; /* The matches (if any), then the target. */ unsigned char elems[0]; }; struct ipt_counters { u_int64_t pcnt, bcnt; /* Packet and byte counters */ }; /* Yes, Virginia, you have to zero the padding. */ struct ipt_ip { IP头匹配模板 /* Source and destination IP addr */ struct in_addr src, dst; /* Mask for src and dest IP addr */ struct in_addr smsk, dmsk; char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; /* Protocol, 0 = ANY */ u_int16_t proto; /* Flags word */ u_int8_t flags; /* Inverse flags */ u_int8_t invflags; 反匹配标志集 }; #define IPT_FUNCTION_MAXNAMELEN 30 struct ipt_entry_match 匹配操作结构 { union { struct { u_int16_t match_size; /* Used by userspace */ char name[IPT_FUNCTION_MAXNAMELEN]; } user; struct { u_int16_t match_size; /* Used inside the kernel */ struct ipt_match *match; } kernel; /* Total length */ u_int16_t match_size; } u; unsigned char data[0]; }; struct ipt_match 匹配器 { struct list_head list; const char name[IPT_FUNCTION_MAXNAMELEN]; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ int (*match)(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop); /* Called when user tries to insert an entry of this type. */ /* Should return true or false. */ int (*checkentry)(const char *tablename, const struct ipt_ip *ip, void *matchinfo, unsigned int matchinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(void *matchinfo, unsigned int matchinfosize); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; }; struct ipt_entry_target 匹配靶 { union { struct { u_int16_t target_size; /* Used by userspace */ char name[IPT_FUNCTION_MAXNAMELEN]; } user; struct { u_int16_t target_size; /* Used inside the kernel */ struct ipt_target *target; } kernel; /* Total length */ u_int16_t target_size; } u; unsigned char data[0]; }; /* Registration hooks for targets. */ struct ipt_target 靶处理器 { struct list_head list; const char name[IPT_FUNCTION_MAXNAMELEN]; /* Returns verdict. */ unsigned int (*target)(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, const void *targinfo, void *userdata); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be called. */ /* Should return true or false. */ int (*checkentry)(const char *tablename, const struct ipt_entry *e, void *targinfo, unsigned int targinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ void (*destroy)(void *targinfo, unsigned int targinfosize); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; }; struct ipt_standard_target 标准靶 { struct ipt_entry_target target; int verdict; 判别字 }; /* The table itself */ struct ipt_table_info IP表表头结构 { /* Size per table */ unsigned int size; /* Number of entries: FIXME. --RR */ unsigned int number; /* Entry points and underflows */ unsigned int hook_entry[NF_IP_NUMHOOKS]; unsigned int underflow[NF_IP_NUMHOOKS]; /* ipt_entry tables: one per CPU */ char entries[0] __attribute__((aligned(SMP_CACHE_BYTES))); }; /* fn returns 0 to continue iteration */ #define IPT_MATCH_ITERATE(e, fn, args...) ({ unsigned int __i; int __ret = 0; struct ipt_entry_match *__m; for (__i = sizeof(struct ipt_entry); __i target_offset; __i += __m->u.match_size) { __m = (void *)(e) + __i; __ret = fn(__m , ## args); if (__ret != 0) break; } __ret; }) /* Standard return verdict, or do jump. */ #define IPT_STANDARD_TARGET "" /* Error verdict. */ #define IPT_ERROR_TARGET "ERROR" ; net/ipv4/netfilter/ip_tables.c: static LIST_HEAD(ipt_target); static LIST_HEAD(ipt_match); static LIST_HEAD(ipt_tables); /* The built-in targets: standard (NULL) and error. */ static struct ipt_target ipt_standard_target = { { NULL, NULL }, IPT_STANDARD_TARGET, NULL, NULL, NULL }; static struct ipt_target ipt_error_target = { { NULL, NULL }, IPT_ERROR_TARGET, ipt_error, NULL, NULL }; static struct nf_sockopt_ops ipt_sockopts = { { NULL, NULL }, PF_INET, IPT_BASE_CTL, IPT_SO_SET_MAX+1, do_ipt_set_ctl, IPT_BASE_CTL, IPT_SO_GET_MAX+1, do_ipt_get_ctl, 0, NULL }; static struct ipt_match tcp_matchstruct = { { NULL, NULL }, "tcp", &tcp_match, &tcp_checkentry, NULL }; static struct ipt_match udp_matchstruct = { { NULL, NULL }, "udp", &udp_match, &udp_checkentry, NULL }; static struct ipt_match icmp_matchstruct = { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL }; static int __init init(void) { int ret; /* Noone else will be downing sem now, so we won't sleep */ down(&ipt_mutex); list_append(&ipt_target, &ipt_standard_target); 注册标准靶处理器 list_append(&ipt_target, &ipt_error_target); 注册出错靶处理器 list_append(&ipt_match, &tcp_matchstruct); 注册TCP包匹配器 list_append(&ipt_match, &udp_matchstruct); 注册UDP包匹配器 list_append(&ipt_match, &icmp_matchstruct); 注册ICMP包匹配器 up(&ipt_mutex); /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); 注册IP表用户接口 if (ret nh.iph; 取IP头 protohdr = (u_int32_t *)ip + ip->ihl; 取传输头 datalen = (*pskb)->len - ip->ihl * 4; 取传输块长度 indev = in ? in->name : nulldevname; 取输入设备名称 outdev = out ? out->name : nulldevname; 取输出设备名称 /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ offset = ntohs(ip->frag_off) & IP_OFFSET; 取IP包片段所在的偏移 read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 private->entries + TABLE_OFFSET(table->private, cpu_number_map(smp_processor_id())); 取过滤表的规则区 e = get_entry(table_base, table->private->hook_entry[hook]); 取过滤编号对应的起始规则项 #ifdef CONFIG_NETFILTER_DEBUG /* Check noone else using our table */ if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) { printk("ASSERT: CPU #%u, %s comefrom(%p) = %X ", smp_processor_id(), table->name, &((struct ipt_entry *)table_base)->comefrom, ((struct ipt_entry *)table_base)->comefrom); } ((struct ipt_entry *)table_base)->comefrom = 0x57acc001; #endif /* For return from builtin chain */ back = get_entry(table_base, table->private->underflow[hook]); do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); (*pskb)->nfcache |= e->nfcache; if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { 匹配IP头成功 struct ipt_entry_target *t; if (IPT_MATCH_ITERATE(e, do_match, 对匹配链进行链式匹配 *pskb, in, out, offset, protohdr, datalen, &hotdrop) != 0) goto no_match; ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); 刷新规则项中包和字节计数器 t = ipt_get_target(e); 取规则项的靶结构 IP_NF_ASSERT(t->u.kernel.target); /* Standard target? */ if (!t->u.kernel.target->target) { 如果是标准靶 int v; v = ((struct ipt_standard_target *)t)->verdict; if (v comefrom); continue; } if (table_base + v != (void *)e + e->next_offset) { 如果是转移靶 /* Save old back ptr in next entry */ struct ipt_entry *next = (void *)e + e->next_offset; next->comefrom = (void *)back - table_base; /* set back pointer to next entry */ back = next; 设置返回点 } e = get_entry(table_base, v); } else { /* Targets which reenter must return abs. verdicts */ #ifdef CONFIG_NETFILTER_DEBUG ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif verdict = t->u.kernel.target->target(pskb, hook, in, out, t->data, userdata); 调用靶函数 #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) { printk("Target %s reentered! ", t->u.kernel.target->name); verdict = NF_DROP; } ((struct ipt_entry *)table_base)->comefrom = 0x57acc001; #endif /* Target might have changed stuff. */ ip = (*pskb)->nh.iph; protohdr = (u_int32_t *)ip + ip->ihl; datalen = (*pskb)->len - ip->ihl * 4; if (verdict == IPT_CONTINUE) 继续下一规则项匹配 e = (void *)e + e->next_offset; else /* Verdict */ break; } } else { 规则项匹配不成功 no_match: e = (void *)e + e->next_offset; 取下一规则项 } } while (!hotdrop); #ifdef CONFIG_NETFILTER_DEBUG ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac; #endif read_unlock_bh(&table->lock); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; #else if (hotdrop) return NF_DROP; else return verdict; #endif } /* Returns whether matches rule or not. */ static inline int ip_packet_match(const struct iphdr *ip, const char *indev, const char *outdev, const struct ipt_ip *ipinfo, int isfrag) IP头匹配器 { size_t i; unsigned long ret; #define FWINV(bool,inv) ((bool) ^ !!(ipinfo->invflags & inv)) 如果模板中具有inv标志, 则bool值取反 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, IPT_INV_SRCIP) 匹配源地址模板 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, 匹配目标地址模板 IPT_INV_DSTIP)) { dprintf("Source or dest mismatch. "); dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s ", NIPQUAD(ip->saddr), NIPQUAD(ipinfo->smsk.s_addr), NIPQUAD(ipinfo->src.s_addr), ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s ", NIPQUAD(ip->daddr), NIPQUAD(ipinfo->dmsk.s_addr), NIPQUAD(ipinfo->dst.s_addr), ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); return 0; } /* Look for ifname matches; this should unroll nicely. */ for (i = 0, ret = 0; i iniface)) & ((const unsigned long *)ipinfo->iniface_mask); } if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s ", indev, ipinfo->iniface, ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); return 0; } for (i = 0, ret = 0; i outiface)) & ((const unsigned long *)ipinfo->outiface_mask); } if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s ", outdev, ipinfo->outiface, ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); return 0; } /* Check specific protocol */ if (ipinfo->proto 匹配IP传输协议码 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { dprintf("Packet protocol %hi does not match %hi.%s ", ip->protocol, ipinfo->proto, ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); return 0; } /* If we have a fragment rule but the packet is not a fragment * then we return zero */ 匹配IP包是否允许分片 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { dprintf("Fragment rule but not fragment.%s ", ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); return 0; } return 1; } static inline int do_match(struct ipt_entry_match *m, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) 链式匹配中的匹配器接口 { /* Stop iteration if it doesn't match */ if (!m->u.kernel.match->match(skb, in, out, m->data, /* m->data为匹配项中的参数区 */ offset, hdr, datalen, hotdrop)) return 1; else return 0; } static unsigned int ipt_error(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, const void *targinfo, void *userinfo) 错误靶函数 { if (net_ratelimit()) printk("ip_tables: error: `%s' ", (char *)targinfo); return NF_DROP; } /* TCP matching stuff */ struct ipt_tcp TCP匹配操作项参数区 { u_int16_t spts[2]; /* Source port range. */ u_int16_t dpts[2]; /* Destination port range. */ u_int8_t option; /* TCP Option iff non-zero*/ u_int8_t _mask; /* TCP flags mask byte */ u_int8_t _cmp; /* TCP flags compare byte */ u_int8_t invflags; /* Inverse flags */ }; static int tcp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) TCP匹配器函数 { const struct tcphdr *tcp = hdr; const struct ipt_tcp *tcpinfo = matchinfo; /* To quote Alan: Don't allow a fragment of TCP 8 bytes in. Nobody normal causes this. Its a cracker trying to break in by doing a flag overwrite to pass the direction checks. */ if (offset == 1) { duprintf("Dropping evil TCP offset=1 frag. "); *hotdrop = 1; return 0; } else if (offset == 0 && datalen > packet len against various stacks --RR */ #define FWINVTCP(bool,inv) ((bool) ^ !!(tcpinfo->invflags & inv)) /* Must not be a fragment. */ return !offset && port_match(tcpinfo->spts[0], tcpinfo->spts[1], ntohs(tcp->source), !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)) TCP源端口号是否在范围内 && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1], ntohs(tcp->dest), !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)) TCP目的端口号是否在范围内 && FWINVTCP((((unsigned char *)tcp)[13] & tcpinfo->_mask) == tcpinfo->_cmp, IPT_TCP_INV_FLAGS) 匹配TCP标志码 && (!tcpinfo->option || tcp_find_option(tcpinfo->option, tcp, datalen, tcpinfo->invflags & IPT_TCP_INV_OPTION, hotdrop)); 是否允许特定的TCP选项 } /* Called when user tries to insert an entry of this type. */ static int tcp_checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo, unsigned int matchsize, unsigned int hook_mask) TCP匹配器参数初始化校验函数 { const struct ipt_tcp *tcpinfo = matchinfo; /* Must specify proto == TCP, and no unknown invflags */ return ip->proto == IPPROTO_TCP && !(ip->invflags & IPT_INV_PROTO) && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp)) && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK); } /* UDP matching stuff */ struct ipt_udp { u_int16_t spts[2]; /* Source port range. */ u_int16_t dpts[2]; /* Destination port range. */ u_int8_t invflags; /* Inverse flags */ }; static int udp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) UDP匹配器函数 { const struct udphdr *udp = hdr; const struct ipt_udp *udpinfo = matchinfo; if (offset == 0 && datalen spts[0], udpinfo->spts[1], ntohs(udp->source), !!(udpinfo->invflags & IPT_UDP_INV_SRCPT)) && port_match(udpinfo->dpts[0], udpinfo->dpts[1], ntohs(udp->dest), !!(udpinfo->invflags & IPT_UDP_INV_DSTPT)); } /* Called when user tries to insert an entry of this type. */ static int udp_checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo, unsigned int matchinfosize, unsigned int hook_mask) { const struct ipt_udp *udpinfo = matchinfo; /* Must specify proto == UDP, and no unknown invflags */ if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) { duprintf("ipt_udp: Protocol %u != %u ", ip->proto, IPPROTO_UDP); return 0; } if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) { duprintf("ipt_udp: matchsize %u != %u ", matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp))); return 0; } if (udpinfo->invflags & ~IPT_UDP_INV_MASK) { duprintf("ipt_udp: unknown flags %X ", udpinfo->invflags); return 0; } return 1; } /* ICMP matching stuff */ struct ipt_icmp { u_int8_t type; /* type to match */ u_int8_t code[2]; /* range of code */ u_int8_t invflags; /* Inverse flags */ }; static int icmp_match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) { const struct icmphdr *icmp = hdr; const struct ipt_icmp *icmpinfo = matchinfo; if (offset == 0 && datalen type, icmpinfo->code[0], icmpinfo->code[1], icmp->type, icmp->code, !!(icmpinfo->invflags&IPT_ICMP_INV)); 匹配具有特定类型,控制码在某个范围内的ICMP包 } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ static inline int icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, u_int8_t type, u_int8_t code, int invert) { return (type == test_type && code >= min_code && code proto == IPPROTO_ICMP && !(ip->invflags & IPT_INV_PROTO) && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp)) && !(icmpinfo->invflags & ~IPT_ICMP_INV); } /* Returns 1 if the port is matched by the range, 0 otherwise */ static inline int port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert) { int ret; ret = (port >= min && port doff * 4 > datalen) { *hotdrop = 1; return 0; } while (i doff * 4) { if (opt == option) return !invert; if (opt prev); }
|