clone.2: CLONE_CHILD_SETTID has effect before clone() returns *in the child*

CLONE_CHILD_SETTID may not have had effect by the time clone()
returns in the parent, which could bre relevant if the
CLONE_VM flag is employed. The relevant kernel code is in
schedule_tail(), which is called in ret_from_fork()
in the child.

See https://bugzilla.kernel.org/show_bug.cgi?id=203105

Demonstration using the program shown below (inspired by a simpler
example from Jakub):

$ ./a.out
parent start: ctid =     0    ptid =  6212
child start:  ctid =  6212    ptid =  6212
child later:  ctid =  6212    ptid =  6212
cat parent later: ctid =  6212    ptid =  6212
f.child -- bye

$ cat prog.c

static volatile pid_t ctid, ptid;

static int
child_fn(void *arg)
{
    printf("child start:  ctid = %5d    ptid = %5d\n", ctid, ptid);
    sleep(1);
    printf("child later:  ctid = %5d    ptid = %5d\n", ctid, ptid);
    sleep(2);
    printf("child -- bye\n");
    return 0;
}

int
main(void)
{
    void *stack = malloc(0x1000);
    char *stack_top = (char *) stack + 0x1000;
    int flags =  SIGCHLD | CLONE_VM |
                 // CLONE_VFORK |
                 CLONE_PARENT_SETTID | CLONE_CHILD_SETTID;

    if (clone(child_fn, stack_top, flags, NULL, &ptid, NULL, &ctid) == -1) {
        perror("clone");
        exit(EXIT_SUCCESS);
    }

    fprintf(stderr, "parent start: ctid = %5d    ptid = %5d\n", ctid, ptid);
    sleep(2);
    fprintf(stderr, "parent later: ctid = %5d    ptid = %5d\n", ctid, ptid);

    if (wait(NULL) == -1) {
        perror("wait");
        exit(EXIT_FAILURE);
    }

    exit(EXIT_SUCCESS);
}

Reported-by: Jakub Nowak <jakub.jakub.nowak@gmail.com>
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
1 file changed