zoukankan      html  css  js  c++  java
  • 内核full nat 简析

    DPVS 参考LVS设计,其核心在LVS的netfilter框架;笔者曾经做过类似的FULLNAT 满足portal 三层认证

       LVS的HOOK点函数在内核中IPVS的源码在net/netfilter/ipvs目录下,LVS是以netfilter框架为基础,先看一下LVS在哪些HOOK点挂载了自己的处理函数。IPVS的处理函数在挂载在下面三个HOOK点,NF_INET_LOCAL_IN, NF_INET_FORWARD,NF_INET_POST_ROUTING,LVS同时支持IPv4/IPv6两种协议。

    static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        /* After packet filtering, forward packet through VS/DR, VS/TUN,
         * or VS/NAT(change destination), so that filtering rules can be
         * applied to IPVS. */
        {
            .hook        = ip_vs_in,    
            .owner        = THIS_MODULE,
            .pf        = PF_INET,
            .hooknum = NF_INET_LOCAL_IN,
            .priority = 100,
        },
        /* After packet filtering, change source only for VS/NAT */
        {
            .hook        = ip_vs_out,
            .owner        = THIS_MODULE,
            .pf        = PF_INET,
            .hooknum = NF_INET_FORWARD,
            .priority = 100,
        },
        /* After packet filtering (but before ip_vs_out_icmp), catch icmp
         * destined for 0.0.0.0/0, which is for incoming IPVS connections */
        {
            .hook        = ip_vs_forward_icmp,
            .owner        = THIS_MODULE,
            .pf        = PF_INET,
            .hooknum = NF_INET_FORWARD,
            .priority = 99,
        },
        /* Before the netfilter connection tracking, exit from POST_ROUTING */
        {
            .hook        = ip_vs_post_routing,
            .owner        = THIS_MODULE,
            .pf        = PF_INET,
            .hooknum = NF_INET_POST_ROUTING,
            .priority = NF_IP_PRI_NAT_SRC-1,
        },
    #ifdef CONFIG_IP_VS_IPV6
        /* After packet filtering, forward packet through VS/DR, VS/TUN,
         * or VS/NAT(change destination), so that filtering rules can be
         * applied to IPVS. */
        {
            .hook        = ip_vs_in,
            .owner        = THIS_MODULE,
            .pf        = PF_INET6,
            .hooknum = NF_INET_LOCAL_IN,
            .priority = 100,
        },
        /* After packet filtering, change source only for VS/NAT */
        {
            .hook        = ip_vs_out,
            .owner        = THIS_MODULE,
            .pf        = PF_INET6,
            .hooknum = NF_INET_FORWARD,
            .priority = 100,
        },
        /* After packet filtering (but before ip_vs_out_icmp), catch icmp
         * destined for 0.0.0.0/0, which is for incoming IPVS connections */
        {
            .hook        = ip_vs_forward_icmp_v6,
            .owner        = THIS_MODULE,
            .pf        = PF_INET6,
            .hooknum = NF_INET_FORWARD,
            .priority = 99,
        },
        /* Before the netfilter connection tracking, exit from POST_ROUTING */
        {
            .hook        = ip_vs_post_routing,
            .owner        = THIS_MODULE,
            .pf        = PF_INET6,
            .hooknum = NF_INET_POST_ROUTING,
            .priority = NF_IP6_PRI_NAT_SRC-1,
        },
    #endif
    };

    4.2  ip_vs_in函数

      当远端的客户端发送数据到达服务器时,也就是LVS Server时,该数据包的目的IP地址就是到达本地的,所以会先进入NF_LOCAL_IN的HOOK点,进行本地处理。
      
    static unsigned int
    ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
         const struct net_device *in, const struct net_device *out,
         int (*okfn)(struct sk_buff *))
    {
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_conn *cp;
        int ret, restart, af, pkts;
       //判断是IPV4还是IPV6协议
        af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6;
       /*获取到IP头*/
        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
    
        /*
         *    Big tappo: only PACKET_HOST, including loopback for local client
         *    Don't handle local packets on IPv6 for now
         判断是否是达到本机的*/
        if (unlikely(skb->pkt_type != PACKET_HOST)) {
            IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
                 skb->pkt_type,
                 iph.protocol,
                 IP_VS_DBG_ADDR(af, &iph.daddr));
            return NF_ACCEPT;
        }
    /*对IPV6的判断忽略*/
    #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
            if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
                int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
    
                if (related)
                    return verdict;
                ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
            }
        } else
    #endif
    /*判断是否是ICMP报文,*/
            if (unlikely(iph.protocol == IPPROTO_ICMP)) {
                int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
    
                if (related)
                    return verdict;
                ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
            }
    
        /* Protocol supported? 判断LVS支持的协议,目前支持TCP、UDP、sctp、ah、esp    */
        pp = ip_vs_proto_get(iph.protocol);
        if (unlikely(!pp))
            return NF_ACCEPT;
    
        /*
         * Check if the packet belongs to an existing connection entry,检查该数据包是否已经属于已经存在的连接实例,对于第一个包,这里返回NULL,这里对应的回调函数为ip_vs_conn_in_get_proto,该函数主要是根据源IP,目的IP,源端口和目的端口,协议号来查找。 */
        cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
    /*对于新到来的数据包包,查找不到cp为空,进入下面的if处理*/
        if (unlikely(!cp)) {
            int v;
    
            /* For local client packets, it could be a response 对于TCP这里调用的函数为ip_vs_conn_out_get_proto 新的内核版本已经没有了response的处理,这里不作分析*/
            cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
            if (cp)
                return handle_response(af, skb, pp, cp, iph.len); /*这里查找是否是出去的报文,if (res_dir == IP_VS_CIDX_F_IN2OUT) {
                return handle_response(af, skb, pp, cp, iph.len);
            } 做SNAT*/
        /*没有查找到connection ,创建新的connection ,这里的函数为tcp_conn_schedule */
            if (!pp->conn_schedule(af, skb, pp, &v, &cp))
                return v;
        }
    
        if (unlikely(!cp)) {
            /* sorry, all this trouble for a no-hit :) 到这里说明创建也没有成功,直接返回ACCEPT*/
            IP_VS_DBG_PKT(12, pp, skb, 0,
                 "packet continues traversal as normal");
            return NF_ACCEPT;
        }
    
        IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");
    
        /* Check the server status 检查server的状态,如果cp->dest不为空,但是服务器不可用,则直接丢弃该数据包*/
        if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
            /* the destination server is not available */
    
            if (sysctl_ip_vs_expire_nodest_conn) {
                /* try to expire the connection immediately */
                ip_vs_conn_expire_now(cp);
            }
            /* don't restart its timer, and silently
             drop the packet. 引用计数减一 */
            __ip_vs_conn_put(cp);
            return NF_DROP;
        }
       /*更新统计和状态信息*/
        ip_vs_in_stats(cp, skb);
      /*设置现在的状态为IP_VS_DIR_INPUT,调用的函数为,最终调用的函数为tcp_state_transition */
        restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
        if (cp->packet_xmit)
            ret = cp->packet_xmit(skb, cp, pp); /*调用发包函数,对于NAT模式来说这里的发包函数为ip_vs_nat_xmit */
            /* do not touch skb anymore */
        else {
            IP_VS_DBG_RL("warning: packet_xmit is null");
            ret = NF_ACCEPT;
        }
    
        /* Increase its packet counter and check if it is needed
         * to be synchronized
         *
         * Sync connection if it is about to close to
         * encorage the standby servers to update the connections timeout
         增加发送的包的数量,并进行同步*/
        pkts = atomic_add_return(1, &cp->in_pkts);
        if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
         cp->protocol == IPPROTO_SCTP) {
            if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
                (atomic_read(&cp->in_pkts) %
                 sysctl_ip_vs_sync_threshold[1]
                 == sysctl_ip_vs_sync_threshold[0])) ||
                    (cp->old_state != cp->state &&
                     ((cp->state == IP_VS_SCTP_S_CLOSED) ||
                     (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
                     (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
                ip_vs_sync_conn(cp);//这里进行同步,同步主要是同步备份LVS server,这里是针对SCTP协议
                goto out;
            }
        }
    
        if (af == AF_INET &&
         (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
         (((cp->protocol != IPPROTO_TCP ||
         cp->state == IP_VS_TCP_S_ESTABLISHED) &&
         (pkts % sysctl_ip_vs_sync_threshold[1]
         == sysctl_ip_vs_sync_threshold[0])) ||
         ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
         ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
         (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
         (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
            ip_vs_sync_conn(cp); //这里进行同步,同步主要是同步备份LVS server,这里是针对TCP协议
    
    out:
        cp->old_state = cp->state;
    
        ip_vs_conn_put(cp);
        return ret;
    }

    4.2.1  tcp_conn_schedule

    在该函数中调用调度策略函数,按照调度策略找到真正的real Server。

    static int
    tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
              struct ip_vs_proto_data *pd,
              int *verdict, struct ip_vs_conn **cpp,
              struct ip_vs_iphdr *iph)
    {
        struct ip_vs_service *svc;
        struct tcphdr _tcph, *th;
        __be16 _ports[2], *ports = NULL;
    
        /* In the event of icmp, we're only guaranteed to have the first 8
         * bytes of the transport header, so we only check the rest of the
         * TCP packet for non-ICMP packets
         */
        if (likely(!ip_vs_iph_icmp(iph))) {
            th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
            if (th) {
                if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
                    return 1;
                ports = &th->source;
            }
        } else {
            ports = skb_header_pointer(
                skb, iph->len, sizeof(_ports), &_ports);
        }
    
        if (!ports) {
            *verdict = NF_DROP;
            return 0;
        }
    
        /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
        rcu_read_lock();
    
        if (likely(!ip_vs_iph_inverse(iph)))
            svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
                         &iph->daddr, ports[1]);
        else
            svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
                         &iph->saddr, ports[0]);
    
        if (svc) {
            int ignored;
    
            if (ip_vs_todrop(ipvs)) {
                /*
                 * It seems that we are very loaded.
                 * We have to drop this packet :(
                 */
                rcu_read_unlock();
                *verdict = NF_DROP;
                return 0;
            }
    
            /* 根据协议号和目的地址、目的端口号查找到ip_vs_service 实例
             * Let the virtual server select a real server for the
             * incoming connection, and create a connection entry.
             */
            *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
            if (!*cpp && ignored <= 0) {
                if (!ignored)
                    *verdict = ip_vs_leave(svc, skb, pd, iph);
                else
                    *verdict = NF_DROP;
                rcu_read_unlock();
                return 0;
            }
        }
        rcu_read_unlock();
        /* NF_ACCEPT */
        return 1;
    }
    /*
     *  IPVS main scheduling function
     *  It selects a server according to the virtual service, and
     *  creates a connection entry.
     *  Protocols supported: TCP, UDP
     *
     *  Usage of *ignored
     *
     * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
     *       svc/scheduler decides that this packet should be accepted with
     *       NF_ACCEPT because it must not be scheduled.
     *
     * 0 :   scheduler can not find destination, so try bypass or
     *       return ICMP and then NF_DROP (ip_vs_leave).
     *
     * -1 :  scheduler tried to schedule but fatal error occurred, eg.
     *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
     *       failure such as missing Call-ID, ENOMEM on skb_linearize
     *       or pe_data. In this case we should return NF_DROP without
     *       any attempts to send ICMP with ip_vs_leave.
     */
    struct ip_vs_conn *
    ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
               struct ip_vs_proto_data *pd, int *ignored,
               struct ip_vs_iphdr *iph)
    {
        struct ip_vs_protocol *pp = pd->pp;
        struct ip_vs_conn *cp = NULL;
        struct ip_vs_scheduler *sched;
        struct ip_vs_dest *dest;
        __be16 _ports[2], *pptr, cport, vport;
        const void *caddr, *vaddr;
        unsigned int flags;
    
        *ignored = 1;
        /*
         * IPv6 frags, only the first hit here.
         */
        pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
        if (pptr == NULL)
            return NULL;
    
        if (likely(!ip_vs_iph_inverse(iph))) {
            cport = pptr[0];
            caddr = &iph->saddr;
            vport = pptr[1];
            vaddr = &iph->daddr;
        } else {
            cport = pptr[1];
            caddr = &iph->daddr;
            vport = pptr[0];
            vaddr = &iph->saddr;
        }
    
        /*
         * FTPDATA needs this check when using local real server.
         * Never schedule Active FTPDATA connections from real server.
         * For LVS-NAT they must be already created. For other methods
         * with persistence the connection is created on SYN+ACK.
         */
        if (cport == FTPDATA) {
            IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
                      "Not scheduling FTPDATA");
            return NULL;
        }
    
        /*
         *    Do not schedule replies from local real server.
         */
        if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
            iph->hdr_flags ^= IP_VS_HDR_INVERSE;
            cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
            iph->hdr_flags ^= IP_VS_HDR_INVERSE;
    
            if (cp) {
                IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
                          "Not scheduling reply for existing"
                          " connection");
                __ip_vs_conn_put(cp);
                return NULL;
            }
        }
    
        /*
         *    Persistent service
         */
        if (svc->flags & IP_VS_SVC_F_PERSISTENT)
            return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
                           iph);
    
        *ignored = 0;
    
        /*
         *    Non-persistent service
         */
        if (!svc->fwmark && vport != svc->port) {
            if (!svc->port)
                pr_err("Schedule: port zero only supported "
                       "in persistent services, "
                       "check your ipvs configuration\n");
            return NULL;
        }
    
        sched = rcu_dereference(svc->scheduler);
        if (sched) {
            /* read svc->sched_data after svc->scheduler */
            smp_rmb();
            dest = sched->schedule(svc, skb, iph);
        } else {
            dest = NULL;
        }
        if (dest == NULL) {
            IP_VS_DBG(1, "Schedule: no dest found.\n");
            return NULL;
        }
    
        flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
             && iph->protocol == IPPROTO_UDP) ?
            IP_VS_CONN_F_ONE_PACKET : 0;
    
        /*
         *    Create a connection entry.
         */
        {
            struct ip_vs_conn_param p;
    
            ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
                          caddr, cport, vaddr, vport, &p);
            cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
                        dest->port ? dest->port : vport,
                        flags, dest, skb->mark);
            if (!cp) {
                *ignored = -1;
                return NULL;
            }
        }
    
        IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
                  "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
                  ip_vs_fwd_tag(cp),
                  IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
                  IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
                  IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
                  cp->flags, atomic_read(&cp->refcnt));
    
        ip_vs_conn_stats(cp, svc);
        return cp;
    }
    /*
     *    Create a new connection entry and hash it into the ip_vs_conn_tab
     */
    struct ip_vs_conn *
    ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
               const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
               struct ip_vs_dest *dest, __u32 fwmark)
    {
        struct ip_vs_conn *cp;
        struct netns_ipvs *ipvs = p->ipvs;
        struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
                                   p->protocol);
    
        cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
        if (cp == NULL) {
            IP_VS_ERR_RL("%s(): no memory\n", __func__);
            return NULL;
        }
    
        INIT_HLIST_NODE(&cp->c_list);
        setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
        cp->ipvs       = ipvs;
        cp->af           = p->af;
        cp->daf           = dest_af;
        cp->protocol       = p->protocol;
        ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
        cp->cport       = p->cport;
        /* proto should only be IPPROTO_IP if p->vaddr is a fwmark */
        ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
                   &cp->vaddr, p->vaddr);
        cp->vport       = p->vport;
        ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
        cp->dport          = dport;
        cp->flags       = flags;
        cp->fwmark         = fwmark;
        if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
            ip_vs_pe_get(p->pe);
            cp->pe = p->pe;
            cp->pe_data = p->pe_data;
            cp->pe_data_len = p->pe_data_len;
        } else {
            cp->pe = NULL;
            cp->pe_data = NULL;
            cp->pe_data_len = 0;
        }
        spin_lock_init(&cp->lock);
    
        /*
         * Set the entry is referenced by the current thread before hashing
         * it in the table, so that other thread run ip_vs_random_dropentry
         * but cannot drop this entry.
         */
        atomic_set(&cp->refcnt, 1);
    
        cp->control = NULL;
        atomic_set(&cp->n_control, 0);
        atomic_set(&cp->in_pkts, 0);
    
        cp->packet_xmit = NULL;
        cp->app = NULL;
        cp->app_data = NULL;
        /* reset struct ip_vs_seq */
        cp->in_seq.delta = 0;
        cp->out_seq.delta = 0;
    
        atomic_inc(&ipvs->conn_count);
        if (flags & IP_VS_CONN_F_NO_CPORT)
            atomic_inc(&ip_vs_conn_no_cport_cnt);
    
        /* Bind the connection with a destination server */
        cp->dest = NULL;
        ip_vs_bind_dest(cp, dest);
    
        /* Set its state and timeout */
        cp->state = 0;
        cp->old_state = 0;
        cp->timeout = 3*HZ;
        cp->sync_endtime = jiffies & ~3UL;
    
        /* Bind its packet transmitter */
    #ifdef CONFIG_IP_VS_IPV6
        if (p->af == AF_INET6)
            ip_vs_bind_xmit_v6(cp);
        else
    #endif
            ip_vs_bind_xmit(cp);
    
        if (unlikely(pd && atomic_read(&pd->appcnt)))
            ip_vs_bind_app(cp, pd->pp);
    
        /*
         * Allow conntrack to be preserved. By default, conntrack
         * is created and destroyed for every packet.
         * Sometimes keeping conntrack can be useful for
         * IP_VS_CONN_F_ONE_PACKET too.
         */
    
        if (ip_vs_conntrack_enabled(ipvs))
            cp->flags |= IP_VS_CONN_F_NFCT;
    
        /* Hash it in the ip_vs_conn_tab finally */
        ip_vs_conn_hash(cp);
    
        return cp;
    }
    
    
    /*
     *    Bind a connection entry with the corresponding packet_xmit.
     *    Called by ip_vs_conn_new.
     */
    static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
    {
        switch (IP_VS_FWD_METHOD(cp)) {
        case IP_VS_CONN_F_MASQ:
            cp->packet_xmit = ip_vs_nat_xmit;
            break;
    
        case IP_VS_CONN_F_TUNNEL:
    #ifdef CONFIG_IP_VS_IPV6
            if (cp->daf == AF_INET6)
                cp->packet_xmit = ip_vs_tunnel_xmit_v6;
            else
    #endif
                cp->packet_xmit = ip_vs_tunnel_xmit;
            break;
    
        case IP_VS_CONN_F_DROUTE:
            cp->packet_xmit = ip_vs_dr_xmit;
            break;
    
        case IP_VS_CONN_F_LOCALNODE:
            cp->packet_xmit = ip_vs_null_xmit;
            break;
    
        case IP_VS_CONN_F_BYPASS:
            cp->packet_xmit = ip_vs_bypass_xmit;
            break;
        }
    }

     如果是NAT模式下:执行ip_vs_nat_xmit 转发报文到RS服务器

    /*
     *      NAT transmitter (only for outside-to-inside nat forwarding)
     *      Not used for related ICMP
     */
    int
    ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
               struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
    {
        struct rtable *rt;        /* Route to the other host */
        int local, rc, was_input;
    
        EnterFunction(10);
    
        rcu_read_lock();
        /* check if it is a connection of no-client-port */
        if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
            __be16 _pt, *p;
    
            p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
            if (p == NULL)
                goto tx_error;
            ip_vs_conn_fill_cport(cp, *p);
            IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
        }
    
        was_input = rt_is_input_route(skb_rtable(skb));
        local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
                       IP_VS_RT_MODE_LOCAL |
                       IP_VS_RT_MODE_NON_LOCAL |
                       IP_VS_RT_MODE_RDR, NULL, ipvsh);
        if (local < 0)
            goto tx_error;
        rt = skb_rtable(skb);
        /*
         * Avoid duplicate tuple in reply direction for NAT traffic
         * to local address when connection is sync-ed
         */
    #if IS_ENABLED(CONFIG_NF_CONNTRACK)
        if (cp->flags & IP_VS_CONN_F_SYNC && local) {
            enum ip_conntrack_info ctinfo;
            struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
    
            if (ct && !nf_ct_is_untracked(ct)) {
                IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
                         "ip_vs_nat_xmit(): "
                         "stopping DNAT to local address");
                goto tx_error;
            }
        }
    #endif
    
        /* From world but DNAT to loopback address? */
        if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
            IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
                     "ip_vs_nat_xmit(): stopping DNAT to loopback "
                     "address");
            goto tx_error;
        }
    
        /* copy-on-write the packet before mangling it */
        if (!skb_make_writable(skb, sizeof(struct iphdr)))
            goto tx_error;
    
        if (skb_cow(skb, rt->dst.dev->hard_header_len))
            goto tx_error;
    
        /* mangle the packet */
        if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
            goto tx_error;
        ip_hdr(skb)->daddr = cp->daddr.ip;// DNAT  目的ip地址替换
        ip_send_check(ip_hdr(skb));
    
        IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
    
        /* FIXME: when application helper enlarges the packet and the length
           is larger than the MTU of outgoing device, there will be still
           MTU problem. */
    
        /* Another hack: avoid icmp_send in ip_fragment */
        skb->ignore_df = 1;
    
        rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
        rcu_read_unlock();
    
        LeaveFunction(10);
        return rc;
    
      tx_error:
        kfree_skb(skb);
        rcu_read_unlock();
        LeaveFunction(10);
        return NF_STOLEN;
    }
    /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
    static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
                         struct ip_vs_conn *cp, int local)
    {
        int ret = NF_STOLEN;
    
        skb->ipvs_property = 1;
        if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
            ip_vs_notrack(skb);
        else
            ip_vs_update_conntrack(skb, cp, 1);
    
        /* Remove the early_demux association unless it's bound for the
         * exact same port and address on this host after translation.
         */
        if (!local || cp->vport != cp->dport ||
            !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
            ip_vs_drop_early_demux_sk(skb);
    
        if (!local) {
            skb_forward_csum(skb);
            NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
                NULL, skb_dst(skb)->dev, dst_output);
        } else
            ret = NF_ACCEPT;
    
        return ret;
    }

    也就是转发时 会经过LOCK_OUT  hook点;执行对应IP_VS_OUT函数执行对应SNAT,然后走对应的协议栈

    http代理服务器(3-4-7层代理)-网络事件库公共组件、内核kernel驱动 摄像头驱动 tcpip网络协议栈、netfilter、bridge 好像看过!!!! 但行好事 莫问前程 --身高体重180的胖子
  • 相关阅读:
    索引使用及注意事项
    Explain详解与索引
    JVM常量池了解
    认识Mysql索引
    JVM调优工具及了解
    JVM垃圾收集器
    JVM垃圾回收相关算法
    JVM字节码文件结构剖析
    JVM对象创建与内存分配机制
    JVM内存参数设置
  • 原文地址:https://www.cnblogs.com/codestack/p/15729694.html
Copyright © 2011-2022 走看看