From: Herbert Poetzl (herbert_at_13thfloor.at)
Date: Sun 29 Dec 2002 - 21:55:57 GMT
On Sun, Dec 29, 2002 at 04:42:09PM -0500, tedsuo wrote:
> i notice that Jacques got the same problems before releasing ctx16
>
> maybe the bind(any) for multiple IP is a bug...
>
>
> On Fri, 29 Nov 2002 10:26:01 -0500, Klavs Klavsen wrote
> > With tonnes of fixes :-)
> >
> > Jacques, how is your release schedule? have you got a ctx15 release
> > planned in the near future?
>
> Yes I have a ctx-15 which does bind(any) for multiple IP in a vserver,
> but it
> is still crashing on me.
please do not get me wrong, I do not exclude the possibiliy
that the changes introduced by CTX-16 do crash your system
except for cosmetic changes the appended diff shows all
differences between the ctx14/ctx16 patch sets, so the
crash, if it is code based, must be there ...
best,
Herbert
------ CUT HERE ------
diff -NurbP --minimal linux-2.4.20-ctx14/fs/proc/array.c linux-2.4.20-ctx16/fs/proc/array.c
--- linux-2.4.20-ctx14/fs/proc/array.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/fs/proc/array.c Sat Dec 28 04:16:19 2002
@@ -332,6 +332,8 @@
*buffer++ = '\n';
buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
,task->ip_info->v4_bcast);
+ buffer += sprintf (buffer,"ipv4root_refcnt: %d\n"
+ ,task->ip_info->refcount);
}else{
buffer += sprintf (buffer,"ipv4root: 0\n");
buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
diff -NurbP --minimal linux-2.4.20-ctx14/include/net/route.h linux-2.4.20-ctx16/include/net/route.h
--- linux-2.4.20-ctx14/include/net/route.h Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/include/net/route.h Sat Dec 28 04:16:19 2002
@@ -164,14 +164,20 @@
static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif)
{
int err;
- if (current->ip_info != NULL){
- __u32 ipv4root = current->ip_info->ipv4[0];
+ struct iproot_info *ip_info = current->ip_info;
+ if (ip_info != NULL){
+ __u32 ipv4root = ip_info->ipv4[0];
if (ipv4root != 0){
if (src == 0){
src = dst == 0x0100007f
? 0x0100007f: ipv4root;
- }else if (ipv4root != src){
- return -EPERM;
+ }else{
+ int n=ip_info->nbipv4;
+ int i;
+ for (i=0; i<n; i++){
+ if (ip_info->ipv4[i] == src) break;
+ }
+ if (i==n) return -EPERM;
}
if (dst == 0x0100007f && current->s_context != 0){
dst = ipv4root;
diff -NurbP --minimal linux-2.4.20-ctx14/include/net/sock.h linux-2.4.20-ctx16/include/net/sock.h
--- linux-2.4.20-ctx14/include/net/sock.h Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/include/net/sock.h Sat Dec 28 04:16:19 2002
@@ -508,6 +509,8 @@
unsigned char reuse; /* SO_REUSEADDR setting */
unsigned char shutdown;
atomic_t refcnt; /* Reference count */
+ struct iproot_info *ip_info;
+ /* End of common section with tcp_tw_bucket */
socket_lock_t lock; /* Synchronizer... */
int rcvbuf; /* Size of receive buffer in bytes */
@@ -525,7 +528,7 @@
__u32 saddr; /* Sending source */
unsigned int allocation; /* Allocation mode */
int sndbuf; /* Size of send buffer in bytes */
- __u32 bcast_addr; /* Local bcast addr, for ipv4root */
+ __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */
struct sock *prev;
/* Not all are volatile, but some are, so we might as well say they all are.
diff -NurbP --minimal linux-2.4.20-ctx14/include/net/tcp.h linux-2.4.20-ctx16/include/net/tcp.h
--- linux-2.4.20-ctx14/include/net/tcp.h Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/include/net/tcp.h Sat Dec 28 04:16:19 2002
@@ -172,6 +172,7 @@
unsigned char reuse,
rcv_wscale; /* It is also TW bucket specific */
atomic_t refcnt;
+ struct ipv4_info *ip_info;
/* And these are ours. */
int hashent;
diff -NurbP --minimal linux-2.4.20-ctx14/kernel/exit.c linux-2.4.20-ctx16/kernel/exit.c
--- linux-2.4.20-ctx14/kernel/exit.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/kernel/exit.c Sat Dec 28 04:16:19 2002
@@ -67,7 +67,7 @@
if (current->counter >= MAX_COUNTER)
current->counter = MAX_COUNTER;
sys_release_s_info(p);
- sys_release_ip_info(p);
+ sys_release_ip_info(p->ip_info);
p->pid = 0;
free_task_struct(p);
} else {
diff -NurbP --minimal linux-2.4.20-ctx14/kernel/fork.c linux-2.4.20-ctx16/kernel/fork.c
--- linux-2.4.20-ctx14/kernel/fork.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/kernel/fork.c Sat Dec 28 04:16:19 2002
@@ -618,7 +618,7 @@
goto bad_fork_free;
sys_assign_s_info (p);
- sys_assign_ip_info (p);
+ sys_assign_ip_info (p->ip_info);
atomic_inc(&p->user->__count);
atomic_inc(&p->user->processes);
diff -NurbP --minimal linux-2.4.20-ctx14/kernel/sys.c linux-2.4.20-ctx16/kernel/sys.c
--- linux-2.4.20-ctx14/kernel/sys.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/kernel/sys.c Sat Dec 28 04:16:19 2002
@@ -1088,31 +1088,33 @@
current->s_info = s_info;
}
}
+
/*
- Decrease the reference count on the ip_info member of a task
+ Decrease the reference count on the ip_info struct
Free the struct if the reference count reach 0.
*/
-void sys_release_ip_info (struct task_struct *p)
+void sys_release_ip_info (struct iproot_info *ip_info)
{
+ if (ip_info != NULL){
down_write (&uts_sem);
- if (p->ip_info != NULL){
- p->ip_info->refcount--;
- if (p->ip_info->refcount == 0){
+ ip_info->refcount--;
+ if (ip_info->refcount == 0){
// printk ("vfree s_info %d\n",p->pid);
- vfree (p->ip_info);
- p->ip_info = NULL;
- }
+ vfree (ip_info);
}
up_write (&uts_sem);
+ }
}
/*
Increase the reference count on the ip_info member of a task
*/
-void sys_assign_ip_info (struct task_struct *p)
+void sys_assign_ip_info (struct iproot_info *ip_info)
{
+ if (ip_info != NULL){
down_write (&uts_sem);
- if (p->ip_info != NULL) p->ip_info->refcount++;
+ ip_info->refcount++;
up_write (&uts_sem);
+ }
}
/*
@@ -1125,7 +1127,7 @@
// printk ("new s_info %d\n",current->pid);
memset (ip_info,0,sizeof(*ip_info));
ip_info->refcount = 1;
- sys_release_ip_info (current);
+ sys_release_ip_info (current->ip_info);
current->ip_info = ip_info;
}
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/af_inet.c linux-2.4.20-ctx16/net/ipv4/af_inet.c
--- linux-2.4.20-ctx14/net/ipv4/af_inet.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/af_inet.c Sat Dec 28 04:16:19 2002
@@ -177,6 +177,8 @@
if (sk->protinfo.af_inet.opt)
kfree(sk->protinfo.af_inet.opt);
+ sys_release_ip_info (sk->ip_info);
+ sk->ip_info = NULL;
dst_release(sk->dst_cache);
#ifdef INET_REFCNT_DEBUG
atomic_dec(&inet_sock_nr);
@@ -394,6 +396,7 @@
sk->protinfo.af_inet.mc_list = NULL;
sk->s_context = current->s_context;
+ sk->ip_info = NULL;
#ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr);
@@ -479,9 +482,11 @@
unsigned short snum;
int chk_addr_ret;
int err;
- __u32 s_addr;
- __u32 bcast_addr = 0xffffffffl;
- __u32 ipv4root;
+ __u32 s_addr; /* Address used for validation */
+ __u32 s_addr1;
+ __u32 s_addr2 = 0xffffffffl; /* Optional address of the socket */
+ /* bcast in ipv4root world */
+ struct iproot_info *ip_info;
/* If the socket has its own bind function then use it. (RAW) */
if(sk->prot->bind)
@@ -490,30 +495,40 @@
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
- s_addr = addr->sin_addr.s_addr;
- ipv4root = current->ip_info != NULL ? current->ip_info->ipv4[0] : 0;
- if (ipv4root != 0){
+ s_addr = s_addr1 = addr->sin_addr.s_addr;
+ ip_info = current->ip_info;
+ if (ip_info != NULL){
+ __u32 v4_bcast = ip_info->v4_bcast;
+ __u32 ipv4root = ip_info->ipv4[0];
+ int nbipv4 = ip_info->nbipv4;
// printk ("ipv4root0 %08lx %08x\n",ipv4root,s_addr);
- __u32 v4_bcast = current->ip_info->v4_bcast;
if (s_addr == 0){
s_addr = ipv4root;
- bcast_addr = v4_bcast;
+ if (nbipv4 > 1){
+ s_addr1 = 0;
+ }else{
+ s_addr1 = ipv4root;
+ ip_info = NULL;
+ }
+ s_addr2 = v4_bcast;
}else if (s_addr == 0x0100007f){
- s_addr = ipv4root;
+ s_addr = s_addr1 = ipv4root;
+ ip_info = NULL;
}else if (s_addr != v4_bcast
&& s_addr != ipv4root){
int i;
- int nbipv4 = current->ip_info->nbipv4;
for (i=0; i<nbipv4; i++){
- if (s_addr == current->ip_info->ipv4[i]){
+ if (s_addr == ip_info->ipv4[i]){
break;
}
}
if (i == nbipv4) return -EADDRNOTAVAIL;
+ ip_info = NULL;
}
+ //printk ("bind: ip_info != NULL, s_addr %x, s_addr1 %x, s_addr2 %x\n"
+ // ,s_addr,s_addr1,s_addr2);
}
chk_addr_ret = inet_addr_type(s_addr);
- // printk ("ipv4root %08lx %08x %d\n",ipv4root,s_addr,chk_addr_ret);
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
@@ -549,14 +564,18 @@
(sk->num != 0))
goto out;
- sk->rcv_saddr = sk->saddr = s_addr;
- sk->bcast_addr = bcast_addr;
+ sk->rcv_saddr = sk->saddr = s_addr1;
+ sk->rcv_saddr2 = s_addr2;
+ sk->ip_info = ip_info;
+ if (ip_info != NULL) sys_assign_ip_info (ip_info);
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
sk->saddr = 0; /* Use device */
/* Make sure we are allowed to bind here. */
if (sk->prot->get_port(sk, snum) != 0) {
sk->saddr = sk->rcv_saddr = 0;
+ sk->ip_info = NULL;
+ sys_release_ip_info (ip_info);
err = -EADDRINUSE;
goto out;
}
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/raw.c linux-2.4.20-ctx16/net/ipv4/raw.c
--- linux-2.4.20-ctx14/net/ipv4/raw.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/raw.c Sat Dec 28 04:16:19 2002
@@ -96,16 +96,48 @@
write_unlock_bh(&raw_v4_lock);
}
+
+/*
+ Check if an address is in the list
+*/
+static inline int raw_addr_in_list (
+ u32 rcv_saddr1,
+ u32 rcv_saddr2,
+ u32 loc_addr,
+ struct iproot_info *ip_info)
+{
+ int ret = 0;
+ if (loc_addr != 0
+ && (rcv_saddr1 == loc_addr || rcv_saddr2 == loc_addr)){
+ ret = 1;
+ }else if (rcv_saddr1 == 0){
+ /* Accept any address or only the one in the list */
+ if (ip_info == NULL){
+ ret = 1;
+ }else{
+ int n = ip_info->nbipv4;
+ int i;
+ for (i=0; i<n; i++){
+ if (ip_info->ipv4[i] == loc_addr){
+ ret = 1;
+ break;
+ }
+ }
+ }
+ }
+ return ret;
+}
+
struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
unsigned long raddr, unsigned long laddr,
int dif)
{
struct sock *s = sk;
-
for (s = sk; s; s = s->next) {
if (s->num == num &&
!(s->daddr && s->daddr != raddr) &&
- !(s->rcv_saddr && s->rcv_saddr != laddr) &&
+ raw_addr_in_list(s->rcv_saddr,s->rcv_saddr2,laddr,s->ip_info) &&
+// !(s->rcv_saddr && s->rcv_saddr != laddr) &&
!(s->bound_dev_if && s->bound_dev_if != dif))
break; /* gotcha */
}
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/tcp_ipv4.c linux-2.4.20-ctx16/net/ipv4/tcp_ipv4.c
--- linux-2.4.20-ctx14/net/ipv4/tcp_ipv4.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/tcp_ipv4.c Sat Dec 28 04:16:19 2002
@@ -174,6 +174,56 @@
sk->prev = (struct sock *) tb;
}
+/*
+ Return 1 if addr match the socket IP list
+ or the socket is INADDR_ANY
+*/
+static inline int tcp_in_list (struct sock *sk, u32 addr)
+{
+ int ret = 0;
+ struct iproot_info *ip_info = sk->ip_info;
+ if (ip_info != NULL){
+ int n = ip_info->nbipv4;
+ int i;
+ for (i=0; i<n; i++){
+ if (ip_info->ipv4[i] == addr){
+ ret = 1;
+ break;
+ }
+ }
+ }else if (!sk->rcv_saddr || sk->rcv_saddr == addr){
+ ret = 1;
+ }
+ return ret;
+}
+
+/*
+ Check if the addresses in sk1 conflict with those in sk2
+*/
+int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2)
+{
+ int ret = 0;
+ if (sk1->rcv_saddr){
+ /* Bind to one address only */
+ ret = tcp_in_list (sk2,sk1->rcv_saddr);
+ }else if (sk1->ip_info != NULL){
+ /* A restricted bind(any) */
+ struct iproot_info *ip_info = sk1->ip_info;
+ int n = ip_info->nbipv4;
+ int i;
+ for (i=0; i<n; i++){
+ if (tcp_in_list (sk2,ip_info->ipv4[i])){
+ ret = 1;
+ break;
+ }
+ }
+ }else{
+ /* A bind(any) do not allow other bind on the same port */
+ ret = 1;
+ }
+ return ret;
+}
+
static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
{
struct sock *sk2 = tb->owners;
@@ -186,9 +236,7 @@
if (!sk_reuse ||
!sk2->reuse ||
sk2->state == TCP_LISTEN) {
- if (!sk2->rcv_saddr ||
- !sk->rcv_saddr ||
- (sk2->rcv_saddr == sk->rcv_saddr))
+ if (tcp_ipv4_addr_conflict(sk,sk2))
break;
}
}
@@ -407,6 +455,37 @@
wake_up(&tcp_lhash_wait);
}
+/*
+ Check if an address is in the list
+*/
+static inline int tcp_addr_in_list (
+ u32 rcv_saddr,
+ u32 daddr,
+ struct iproot_info *ip_info)
+{
+ int ret = 0;
+ if (rcv_saddr == daddr){
+ ret = 1;
+ }else if (rcv_saddr == 0){
+ /* Accept any address or only the one in the list */
+ if (ip_info == NULL){
+ ret = 1;
+ }else{
+ int n = ip_info->nbipv4;
+ int i;
+ for (i=0; i<n; i++){
+ if (ip_info->ipv4[i] == daddr){
+ ret = 1;
+ break;
+ }
+ }
+ }
+ }
+ return ret;
+}
+
+
+
/* Don't inline this cruft. Here are some nice properties to
* exploit here. The BSD API does not allow a listening TCP
* to specify the remote port nor the remote address for the
@@ -424,10 +503,10 @@
__u32 rcv_saddr = sk->rcv_saddr;
score = 1;
- if(rcv_saddr) {
- if (rcv_saddr != daddr)
- continue;
+ if (tcp_addr_in_list(rcv_saddr,daddr,sk->ip_info)){
score++;
+ }else{
+ continue;
}
if (sk->bound_dev_if) {
if (sk->bound_dev_if != dif)
@@ -455,7 +534,7 @@
if (sk) {
if (sk->num == hnum &&
sk->next == NULL &&
- (!sk->rcv_saddr || sk->rcv_saddr == daddr) &&
+ tcp_addr_in_list(sk->rcv_saddr,daddr,sk->ip_info) &&
!sk->bound_dev_if)
goto sherry_cache;
sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif);
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/tcp_minisocks.c linux-2.4.20-ctx16/net/ipv4/tcp_minisocks.c
--- linux-2.4.20-ctx14/net/ipv4/tcp_minisocks.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/tcp_minisocks.c Sat Dec 28 04:16:19 2002
@@ -381,6 +381,7 @@
tw->pprev_death = NULL;
tw->s_context = sk->s_context;
+ tw->ip_info = NULL;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if(tw->family == PF_INET6) {
@@ -651,6 +652,7 @@
#endif
memcpy(newsk, sk, sizeof(*newsk));
+ sys_assign_ip_info (newsk->ip_info);
newsk->state = TCP_SYN_RECV;
/* SANITY */
diff -NurbP --minimal linux-2.4.20-ctx14/net/ipv4/udp.c linux-2.4.20-ctx16/net/ipv4/udp.c
--- linux-2.4.20-ctx14/net/ipv4/udp.c Sat Dec 28 04:15:57 2002
+++ linux-2.4.20-ctx16/net/ipv4/udp.c Sat Dec 28 04:16:19 2002
@@ -106,6 +106,9 @@
/* Shared by v4/v6 udp. */
int udp_port_rover;
+int tcp_ipv4_addr_conflict (struct sock *sk1, struct sock *sk2);
+
+
static int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
write_lock_bh(&udp_hash_lock);
@@ -160,9 +163,7 @@
if (sk2->num == snum &&
sk2 != sk &&
sk2->bound_dev_if == sk->bound_dev_if &&
- (!sk2->rcv_saddr ||
- !sk->rcv_saddr ||
- sk2->rcv_saddr == sk->rcv_saddr) &&
+ tcp_ipv4_addr_conflict (sk2,sk) &&
(!sk2->reuse || !sk->reuse))
goto fail;
}
@@ -205,6 +206,20 @@
write_unlock_bh(&udp_hash_lock);
}
+static int udp_in_list (struct iproot_info *ip_info, u32 addr)
+{
+ int ret = 0;
+ int n = ip_info->nbipv4;
+ int i;
+ for (i=0; i<n; i++){
+ if (ip_info->ipv4[i] == addr){
+ ret = 1;
+ break;
+ }
+ }
+ return ret;
+}
+
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
@@ -221,6 +236,12 @@
if(sk->rcv_saddr != daddr)
continue;
score++;
+ }else if (sk->ip_info != NULL){
+ if (udp_in_list (sk->ip_info,daddr)){
+ score++;
+ }else{
+ continue;
+ }
}
if(sk->daddr) {
if(sk->daddr != saddr)
@@ -272,7 +294,7 @@
if ((s->num != hnum) ||
(s->daddr && s->daddr!=rmt_addr) ||
(s->dport != rmt_port && s->dport != 0) ||
- (s->rcv_saddr && s->rcv_saddr != loc_addr && s->bcast_addr != loc_addr) ||
+ (s->rcv_saddr && s->rcv_saddr != loc_addr && s->rcv_saddr2 != loc_addr) ||
(s->bound_dev_if && s->bound_dev_if != dif))
continue;
break;
@@ -517,6 +539,24 @@
rt = (struct rtable*)sk_dst_check(sk, 0);
if (rt == NULL) {
+ struct iproot_info *ip_info = current->ip_info;
+ if (ip_info != NULL) {
+ __u32 ipv4root = ip_info->ipv4[0];
+ if (ipv4root != 0){
+ if (daddr == 0x0100007f && current->s_context != 0){
+ daddr = ipv4root;
+ }
+ if (ufh.saddr == 0){
+ ufh.saddr = ipv4root;
+ }
+ #if 0
+ else if (!udp_in_list(ip_info,ufh.saddr)){
+ err = EADDRNOTAVAIL;
+ goto out;
+ }
+ #endif
+ }
+ }
err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
if (err)
goto out;