Newer
Older
* We can do this unlocked here. The futex code uses this flag
* just to verify whether the pi state cleanup has been done
* or not. In the worst case it loops once more.
*/
tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context();
if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe);
/* causes final put_task_struct in finish_task_switch(). */
schedule();
BUG();
/* Avoid "noreturn function does return". */
for (;;)
cpu_relax(); /* For when BUG is null */
EXPORT_SYMBOL_GPL(do_exit);
NORET_TYPE void complete_and_exit(struct completion *comp, long code)
{
if (comp)
complete(comp);
do_exit(code);
}
EXPORT_SYMBOL(complete_and_exit);
SYSCALL_DEFINE1(exit, int, error_code)
{
do_exit((error_code&0xff)<<8);
}
/*
* Take down every thread in the group. This is called by fatal signals
* as well as by sys_exit_group (below).
*/
NORET_TYPE void
do_group_exit(int exit_code)
{
struct signal_struct *sig = current->signal;
BUG_ON(exit_code & 0x80); /* core dumps don't get here */
if (signal_group_exit(sig))
exit_code = sig->group_exit_code;
else if (!thread_group_empty(current)) {
struct sighand_struct *const sighand = current->sighand;
spin_lock_irq(&sighand->siglock);
if (signal_group_exit(sig))
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
else {
sig->group_exit_code = exit_code;
sig->flags = SIGNAL_GROUP_EXIT;
zap_other_threads(current);
}
spin_unlock_irq(&sighand->siglock);
}
do_exit(exit_code);
/* NOTREACHED */
}
/*
* this kills every thread in the thread group. Note that any externally
* wait4()-ing process will get the correct exit code - even if this
* thread is not the thread group leader.
*/
SYSCALL_DEFINE1(exit_group, int, error_code)
/* NOTREACHED */
return 0;
static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
{
struct pid *pid = NULL;
if (type == PIDTYPE_PID)
pid = task->pids[type].pid;
else if (type < PIDTYPE_MAX)
pid = task->group_leader->pids[type].pid;
return pid;
}
static int eligible_child(enum pid_type type, struct pid *pid, int options,
struct task_struct *p)
if (type < PIDTYPE_MAX) {
if (task_pid_type(p, type) != pid)
return 0;
}
/* Wait for all children (clone and not) if __WALL is set;
* otherwise, wait for clone children *only* if __WCLONE is
* set; otherwise, wait for non-clone children *only*. (Note:
* A "clone" child here is one that reports to its parent
* using a signal other than SIGCHLD.) */
if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
&& !(options & __WALL))
return 0;
err = security_task_wait(p);
if (err)
return err;
return 1;
static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
int why, int status,
struct siginfo __user *infop,
struct rusage __user *rusagep)
{
int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
put_task_struct(p);
if (!retval)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval)
retval = put_user(0, &infop->si_errno);
if (!retval)
retval = put_user((short)why, &infop->si_code);
if (!retval)
retval = put_user(pid, &infop->si_pid);
if (!retval)
retval = put_user(uid, &infop->si_uid);
if (!retval)
retval = put_user(status, &infop->si_status);
if (!retval)
retval = pid;
return retval;
}
/*
* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
static int wait_task_zombie(struct task_struct *p, int options,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
unsigned long state;
int retval, status, traced;

David Howells
committed
uid_t uid = __task_cred(p)->uid;
if (!likely(options & WEXITED))
return 0;
if (unlikely(options & WNOWAIT)) {
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
int exit_code = p->exit_code;
int why, status;
get_task_struct(p);
read_unlock(&tasklist_lock);
if ((exit_code & 0x7f) == 0) {
why = CLD_EXITED;
status = exit_code >> 8;
} else {
why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
status = exit_code & 0x7f;
}
return wait_noreap_copyout(p, pid, uid, why,
status, infop, ru);
}
/*
* Try to move the task's state to DEAD
* only one thread is allowed to do this:
*/
state = xchg(&p->exit_state, EXIT_DEAD);
if (state != EXIT_ZOMBIE) {
BUG_ON(state != EXIT_DEAD);
return 0;
}
traced = ptrace_reparented(p);
if (likely(!traced)) {
struct signal_struct *psig;
struct signal_struct *sig;
/*
* The resource counters for the group leader are in its
* own task_struct. Those for dead threads in the group
* are in its signal_struct, as are those for the child
* processes it has previously reaped. All these
* accumulate in the parent's signal_struct c* fields.
*
* We don't bother to take a lock here to protect these
* p->signal fields, because they are only touched by
* __exit_signal, which runs with tasklist_lock
* write-locked anyway, and so is excluded here. We do
* need to protect the access to parent->signal fields,
* as other threads in the parent group can be right
* here reaping other children at the same time.
*/
spin_lock_irq(&p->real_parent->sighand->siglock);
psig = p->real_parent->signal;
sig = p->signal;
psig->cutime =
cputime_add(psig->cutime,
cputime_add(p->utime,
cputime_add(sig->utime,
sig->cutime)));
psig->cstime =
cputime_add(psig->cstime,
cputime_add(p->stime,
cputime_add(sig->stime,
sig->cstime)));
psig->cgtime =
cputime_add(psig->cgtime,
cputime_add(p->gtime,
cputime_add(sig->gtime,
sig->cgtime)));
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=
p->maj_flt + sig->maj_flt + sig->cmaj_flt;
psig->cnvcsw +=
p->nvcsw + sig->nvcsw + sig->cnvcsw;
psig->cnivcsw +=
p->nivcsw + sig->nivcsw + sig->cnivcsw;
psig->cinblock +=
task_io_get_inblock(p) +
sig->inblock + sig->cinblock;
psig->coublock +=
task_io_get_oublock(p) +
sig->oublock + sig->coublock;
task_io_accounting_add(&psig->ioac, &p->ioac);
task_io_accounting_add(&psig->ioac, &sig->ioac);
spin_unlock_irq(&p->real_parent->sighand->siglock);
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
}
/*
* Now we are sure this task is interesting, and no other
* thread can reap it because we set its state to EXIT_DEAD.
*/
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
if (!retval && stat_addr)
retval = put_user(status, stat_addr);
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop) {
int why;
if ((status & 0x7f) == 0) {
why = CLD_EXITED;
status >>= 8;
} else {
why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
status &= 0x7f;
}
retval = put_user((short)why, &infop->si_code);
if (!retval)
retval = put_user(status, &infop->si_status);
}
if (!retval && infop)
retval = put_user(pid, &infop->si_pid);

David Howells
committed
retval = put_user(uid, &infop->si_uid);
if (!retval)
retval = pid;
if (traced) {
/* We dropped tasklist, ptracer could die and untrace */
ptrace_unlink(p);
/*
* If this is not a detached task, notify the parent.
* If it's still not detached after that, don't release
* it now.
*/
do_notify_parent(p, p->exit_signal);
p->exit_state = EXIT_ZOMBIE;
p = NULL;
}
}
write_unlock_irq(&tasklist_lock);
}
if (p != NULL)
release_task(p);
static int *task_stopped_code(struct task_struct *p, bool ptrace)
{
if (ptrace) {
if (task_is_stopped_or_traced(p))
return &p->exit_code;
} else {
if (p->signal->flags & SIGNAL_STOP_STOPPED)
return &p->signal->group_exit_code;
}
return NULL;
}
/*
* Handle sys_wait4 work for one task in state TASK_STOPPED. We hold
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
static int wait_task_stopped(int ptrace, struct task_struct *p,
int __user *stat_addr, struct rusage __user *ru)
{
int retval, exit_code, *p_code, why;
uid_t uid = 0; /* unneeded, required by compiler */

Oleg Nesterov
committed
/*
* Traditionally we see ptrace'd stopped tasks regardless of options.
*/
if (!ptrace && !(options & WUNTRACED))
exit_code = 0;
spin_lock_irq(&p->sighand->siglock);
p_code = task_stopped_code(p, ptrace);
if (unlikely(!p_code))
goto unlock_sig;
exit_code = *p_code;
if (!exit_code)
goto unlock_sig;
*p_code = 0;

David Howells
committed
/* don't need the RCU readlock here as we're holding a spinlock */
uid = __task_cred(p)->uid;
unlock_sig:
spin_unlock_irq(&p->sighand->siglock);
if (!exit_code)
return 0;
/*
* Now we are pretty sure this task is interesting.
* Make sure it doesn't get reaped out from under us while we
* give up the lock and then examine it below. We don't want to
* keep holding onto the tasklist_lock while we call getrusage and
* possibly take page faults for user memory.
*/
get_task_struct(p);
why, exit_code,
infop, ru);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user((exit_code << 8) | 0x7f, stat_addr);
if (!retval && infop)
retval = put_user(SIGCHLD, &infop->si_signo);
if (!retval && infop)
retval = put_user(0, &infop->si_errno);
if (!retval && infop)
retval = put_user((short)why, &infop->si_code);
if (!retval && infop)
retval = put_user(exit_code, &infop->si_status);
if (!retval && infop)
retval = put_user(pid, &infop->si_pid);
retval = put_user(uid, &infop->si_uid);
put_task_struct(p);
BUG_ON(!retval);
return retval;
}
/*
* Handle do_wait work for one task in a live, non-stopped state.
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
static int wait_task_continued(struct task_struct *p, int options,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
int retval;
pid_t pid;
uid_t uid;
if (!unlikely(options & WCONTINUED))
return 0;
if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
return 0;
spin_lock_irq(&p->sighand->siglock);
/* Re-check with the lock held. */
if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
spin_unlock_irq(&p->sighand->siglock);
return 0;
}

David Howells
committed
uid = __task_cred(p)->uid;
get_task_struct(p);
read_unlock(&tasklist_lock);
if (!infop) {
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
put_task_struct(p);
if (!retval && stat_addr)
retval = put_user(0xffff, stat_addr);
if (!retval)
retval = pid;
} else {
retval = wait_noreap_copyout(p, pid, uid,
CLD_CONTINUED, SIGCONT,
infop, ru);
BUG_ON(retval == 0);
}
return retval;
}
/*
* Consider @p for a wait by @parent.
*
* -ECHILD should be in *@notask_error before the first call.
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
* Returns zero if the search for a child should continue;
* then *@notask_error is 0 if @p is an eligible child,
* or another error from security_task_wait(), or still -ECHILD.
static int wait_consider_task(struct task_struct *parent, int ptrace,
struct task_struct *p, int *notask_error,
enum pid_type type, struct pid *pid, int options,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
int ret = eligible_child(type, pid, options, p);
if (!ret)
if (unlikely(ret < 0)) {
/*
* If we have not yet seen any eligible child,
* then let this error code replace -ECHILD.
* A permission error will give the user a clue
* to look for security policy problems, rather
* than for mysterious wait bugs.
*/
if (*notask_error)
*notask_error = ret;
}
if (likely(!ptrace) && unlikely(task_ptrace(p))) {
/*
* This child is hidden by ptrace.
* We aren't allowed to see it now, but eventually we will.
*/
*notask_error = 0;
return 0;
}
if (p->exit_state == EXIT_DEAD)
return 0;
/*
* We don't reap group leaders with subthreads.
*/
if (p->exit_state == EXIT_ZOMBIE && !delay_group_leader(p))
return wait_task_zombie(p, options, infop, stat_addr, ru);
/*
* It's stopped or running now, so it might
* later continue, exit, or stop again.
*/
*notask_error = 0;
if (task_stopped_code(p, ptrace))
return wait_task_stopped(ptrace, p, options,
infop, stat_addr, ru);
return wait_task_continued(p, options, infop, stat_addr, ru);
}
/*
* Do the work of do_wait() for one thread in the group, @tsk.
*
* -ECHILD should be in *@notask_error before the first call.
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
* Returns zero if the search for a child should continue; then
* *@notask_error is 0 if there were any eligible children,
* or another error from security_task_wait(), or still -ECHILD.
*/
static int do_wait_thread(struct task_struct *tsk, int *notask_error,
enum pid_type type, struct pid *pid, int options,
struct siginfo __user *infop, int __user *stat_addr,
struct rusage __user *ru)
{
struct task_struct *p;
list_for_each_entry(p, &tsk->children, sibling) {
/*
* Do not consider detached threads.
*/
if (!task_detached(p)) {
int ret = wait_consider_task(tsk, 0, p, notask_error,
type, pid, options,
infop, stat_addr, ru);
if (ret)
return ret;
}
}
return 0;
}
static int ptrace_do_wait(struct task_struct *tsk, int *notask_error,
enum pid_type type, struct pid *pid, int options,
struct siginfo __user *infop, int __user *stat_addr,
struct rusage __user *ru)
{
struct task_struct *p;
list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
int ret = wait_consider_task(tsk, 1, p, notask_error,
type, pid, options,
infop, stat_addr, ru);
if (ret)
return ret;
}
return 0;
}
static long do_wait(enum pid_type type, struct pid *pid, int options,
struct siginfo __user *infop, int __user *stat_addr,
struct rusage __user *ru)
{
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
trace_sched_process_wait(pid);
add_wait_queue(¤t->signal->wait_chldexit,&wait);
repeat:
/*
* If there is nothing that can match our critiera just get out.
* We will clear @retval to zero if we see any child that might later
* match our criteria, even if we are not able to reap it yet.
*/
retval = -ECHILD;
if ((type < PIDTYPE_MAX) && (!pid || hlist_empty(&pid->tasks[type])))
goto end;
current->state = TASK_INTERRUPTIBLE;
read_lock(&tasklist_lock);
tsk = current;
do {
int tsk_result = do_wait_thread(tsk, &retval,
type, pid, options,
infop, stat_addr, ru);
if (!tsk_result)
tsk_result = ptrace_do_wait(tsk, &retval,
type, pid, options,
infop, stat_addr, ru);
if (tsk_result) {
/*
* tasklist_lock is unlocked and we have a final result.
*/
retval = tsk_result;
goto end;
if (options & __WNOTHREAD)
break;
tsk = next_thread(tsk);
BUG_ON(tsk->signal != current->signal);
} while (tsk != current);
read_unlock(&tasklist_lock);
if (!signal_pending(current)) {
schedule();
goto repeat;
}
end:
current->state = TASK_RUNNING;
remove_wait_queue(¤t->signal->wait_chldexit,&wait);
if (infop) {
if (retval > 0)
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
else {
/*
* For a WNOHANG return, clear out all the fields
* we would set so the user can easily tell the
* difference.
*/
if (!retval)
retval = put_user(0, &infop->si_signo);
if (!retval)
retval = put_user(0, &infop->si_errno);
if (!retval)
retval = put_user(0, &infop->si_code);
if (!retval)
retval = put_user(0, &infop->si_pid);
if (!retval)
retval = put_user(0, &infop->si_uid);
if (!retval)
retval = put_user(0, &infop->si_status);
}
}
return retval;
}
SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
infop, int, options, struct rusage __user *, ru)
struct pid *pid = NULL;
enum pid_type type;
long ret;
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
return -EINVAL;
if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
return -EINVAL;
switch (which) {
case P_ALL:
return -EINVAL;
break;
default:
return -EINVAL;
}
if (type < PIDTYPE_MAX)
pid = find_get_pid(upid);
ret = do_wait(type, pid, options, infop, NULL, ru);
put_pid(pid);
asmlinkage_protect(5, ret, which, upid, infop, options, ru);
SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
int, options, struct rusage __user *, ru)
struct pid *pid = NULL;
enum pid_type type;
long ret;
if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
if (upid == -1)
type = PIDTYPE_MAX;
else if (upid < 0) {
type = PIDTYPE_PGID;
pid = find_get_pid(-upid);
} else if (upid == 0) {
type = PIDTYPE_PGID;
pid = get_task_pid(current, PIDTYPE_PGID);
} else /* upid > 0 */ {
type = PIDTYPE_PID;
pid = find_get_pid(upid);
}
ret = do_wait(type, pid, options | WEXITED, NULL, stat_addr, ru);
put_pid(pid);
asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
return ret;
}
#ifdef __ARCH_WANT_SYS_WAITPID
/*
* sys_waitpid() remains for compatibility. waitpid() should be
* implemented by calling sys_wait4() from libc.a.
*/
SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
{
return sys_wait4(pid, stat_addr, options, NULL);
}
#endif