Newer
Older
sizeof(newsighand->action));
write_lock_irq(&tasklist_lock);
spin_lock(&oldsighand->siglock);
rcu_assign_pointer(tsk->sighand, newsighand);
spin_unlock(&oldsighand->siglock);
write_unlock_irq(&tasklist_lock);
BUG_ON(!thread_group_leader(tsk));
/*
* These functions flushes out all traces of the currently running executable
* so that a new one can be started
*/
static void flush_old_files(struct files_struct * files)
spin_lock(&files->file_lock);
for (;;) {
unsigned long set, i;
j++;
i = j * __NFDBITS;
if (i >= fdt->max_fds)
set = fdt->close_on_exec->fds_bits[j];
fdt->close_on_exec->fds_bits[j] = 0;
spin_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
sys_close(i);
}
}
spin_lock(&files->file_lock);
}
spin_unlock(&files->file_lock);
}
char *get_task_comm(char *buf, struct task_struct *tsk)
{
/* buf must be at least sizeof(tsk->comm) in size */
task_lock(tsk);
strncpy(buf, tsk->comm, sizeof(tsk->comm));
task_unlock(tsk);
void set_task_comm(struct task_struct *tsk, char *buf)
{
task_lock(tsk);
trace_task_rename(tsk, buf);
/*
* Threads may access current->comm without holding
* the task lock, so write the string carefully.
* Readers without a lock may see incomplete new
* names but are safe from non-terminating string reads.
*/
memset(tsk->comm, 0, TASK_COMM_LEN);
wmb();
strlcpy(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk);
static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
{
int i, ch;
/* Copies the binary name from after last slash */
for (i = 0; (ch = *(fn++)) != '\0';) {
if (ch == '/')
i = 0; /* overwrite what we wrote */
else
if (i < len - 1)
tcomm[i++] = ch;
}
tcomm[i] = '\0';
}
int flush_old_exec(struct linux_binprm * bprm)
{
/*
* Make sure we have a private signal table and that
* we are unassociated from the previous thread group.
*/
retval = de_thread(current);
if (retval)
goto out;
filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
acct_arg_size(bprm, 0);
goto out;
set_fs(USER_DS);
current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
flush_thread();
current->personality &= ~bprm->per_clear;
return 0;
out:
return retval;
}
EXPORT_SYMBOL(flush_old_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
}
EXPORT_SYMBOL(would_dump);
void setup_new_exec(struct linux_binprm * bprm)
{
arch_pick_mmap_layout(current->mm);
/* This is the point of no return */
current->sas_ss_sp = current->sas_ss_size = 0;
if (current_euid() == current_uid() && current_egid() == current_gid())
set_dumpable(current->mm, 1);
set_dumpable(current->mm, suid_dumpable);
set_task_comm(current, bprm->tcomm);
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
* some architectures like powerpc
*/
current->mm->task_size = TASK_SIZE;
/* install the new credentials */
if (bprm->cred->uid != current_euid() ||
bprm->cred->gid != current_egid()) {
current->pdeath_signal = 0;
} else {
would_dump(bprm, bprm->file);
if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
set_dumpable(current->mm, suid_dumpable);
/*
* Flush performance counters when crossing a
* security domain:
*/
if (!get_dumpable(current->mm))
perf_event_exit_task(current);
/* An exec changes our domain. We are no longer part of the thread
group */
current->self_exec_id++;
flush_signal_handlers(current, 0);
flush_old_files(current->files);
}
EXPORT_SYMBOL(setup_new_exec);
/*
* Prepare credentials and lock ->cred_guard_mutex.
* install_exec_creds() commits the new creds and drops the lock.
* Or, if exec fails before, free_bprm() should release ->cred and
* and unlock.
*/
int prepare_bprm_creds(struct linux_binprm *bprm)
{
if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex))
return -ERESTARTNOINTR;
bprm->cred = prepare_exec_creds();
if (likely(bprm->cred))
return 0;
mutex_unlock(¤t->signal->cred_guard_mutex);
return -ENOMEM;
}
void free_bprm(struct linux_binprm *bprm)
{
free_arg_pages(bprm);
if (bprm->cred) {
mutex_unlock(¤t->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
kfree(bprm);
}
/*
* install the new credentials for this executable
*/
void install_exec_creds(struct linux_binprm *bprm)
{
security_bprm_committing_creds(bprm);
commit_creds(bprm->cred);
bprm->cred = NULL;
/*
* cred_guard_mutex must be held at least to this point to prevent
* ptrace_attach() from altering our determination of the task's
* credentials; any time after this it may be unlocked.
*/
security_bprm_committed_creds(bprm);
mutex_unlock(¤t->signal->cred_guard_mutex);
}
EXPORT_SYMBOL(install_exec_creds);
/*
* determine how safe it is to execute the proposed program
* - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH
*/
unsigned n_fs;
if (p->ptrace) {
if (p->ptrace & PT_PTRACE_CAP)
bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
else
bprm->unsafe |= LSM_UNSAFE_PTRACE;
}
for (t = next_thread(p); t != p; t = next_thread(t)) {
if (t->fs == p->fs)
n_fs++;
}
rcu_read_unlock();
if (p->fs->users > n_fs) {
bprm->unsafe |= LSM_UNSAFE_SHARE;
res = -EAGAIN;
if (!p->fs->in_exec) {
p->fs->in_exec = 1;
res = 1;
}
/*
* Fill the binprm structure from the inode.
* Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
*
* This may be called multiple times for binary chains (scripts for example).
*/
int prepare_binprm(struct linux_binprm *bprm)
{
umode_t mode;
struct inode * inode = bprm->file->f_path.dentry->d_inode;
int retval;
mode = inode->i_mode;
if (bprm->file->f_op == NULL)
return -EACCES;
/* clear any previous set[ug]id data from a previous binary */
bprm->cred->euid = current_euid();
bprm->cred->egid = current_egid();
if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->euid = inode->i_uid;
}
/* Set-gid? */
/*
* If setgid is set but no group execute bit then this
* is a candidate for mandatory locking, not a setgid
* executable.
*/
if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
bprm->per_clear |= PER_CLEAR_ON_SETID;
bprm->cred->egid = inode->i_gid;
}
}
/* fill in binprm security blob */
retval = security_bprm_set_creds(bprm);
bprm->cred_prepared = 1;
memset(bprm->buf, 0, BINPRM_BUF_SIZE);
return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
/*
* Arguments are '\0' separated strings found at the location bprm->p
* points to; chop off the first by relocating brpm->p to right after
* the first '\0' encountered.
*/
int remove_arg_zero(struct linux_binprm *bprm)
int ret = 0;
unsigned long offset;
char *kaddr;
struct page *page;
do {
offset = bprm->p & ~PAGE_MASK;
page = get_arg_page(bprm, bprm->p, 0);
if (!page) {
ret = -EFAULT;
goto out;
}
kaddr = kmap_atomic(page, KM_USER0);
for (; offset < PAGE_SIZE && kaddr[offset];
offset++, bprm->p++)
;
kunmap_atomic(kaddr, KM_USER0);
put_arg_page(page);
if (offset == PAGE_SIZE)
free_arg_page(bprm, (bprm->p >> PAGE_SHIFT) - 1);
} while (offset == PAGE_SIZE);
bprm->p++;
bprm->argc--;
ret = 0;
}
EXPORT_SYMBOL(remove_arg_zero);
/*
* cycle the list of binary formats handler, until one recognizes the image
*/
int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
unsigned int depth = bprm->recursion_depth;

Denys Vlasenko
committed
pid_t old_pid;
retval = security_bprm_check(bprm);
if (retval)
return retval;
retval = audit_bprm(bprm);
if (retval)
return retval;

Denys Vlasenko
committed
/* Need to fetch pid before load_binary changes it */
rcu_read_lock();
old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
rcu_read_unlock();
retval = -ENOENT;
for (try=0; try<2; try++) {
read_lock(&binfmt_lock);
list_for_each_entry(fmt, &formats, lh) {
int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
if (!fn)
continue;
if (!try_module_get(fmt->module))
continue;
read_unlock(&binfmt_lock);
retval = fn(bprm, regs);
/*
* Restore the depth counter to its starting value
* in this call, so we don't have to rely on every
* load_binary function to restore it on return.
*/
bprm->recursion_depth = depth;
if (depth == 0) {
trace_sched_process_exec(current, old_pid, bprm);
ptrace_event(PTRACE_EVENT_EXEC, old_pid);
}
put_binfmt(fmt);
allow_write_access(bprm->file);
if (bprm->file)
fput(bprm->file);
bprm->file = NULL;
current->did_exec = 1;
return retval;
}
read_lock(&binfmt_lock);
put_binfmt(fmt);
if (retval != -ENOEXEC || bprm->mm == NULL)
break;
if (!bprm->file) {
read_unlock(&binfmt_lock);
return retval;
}
}
read_unlock(&binfmt_lock);
#ifdef CONFIG_MODULES
if (retval != -ENOEXEC || bprm->mm == NULL) {
break;
#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
if (printable(bprm->buf[0]) &&
printable(bprm->buf[1]) &&
printable(bprm->buf[2]) &&
printable(bprm->buf[3]))
break; /* -ENOEXEC */
if (try)
break; /* -ENOEXEC */
request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
}
#else
break;
#endif
}
return retval;
}
EXPORT_SYMBOL(search_binary_handler);
/*
* sys_execve() executes a new program.
*/
static int do_execve_common(const char *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
bool clear_in_exec;
const struct cred *cred = current_cred();
/*
* We move the actual failure in case of RLIMIT_NPROC excess from
* set*uid() to execve() because too many poorly written programs
* don't check setuid() return code. Here we additionally recheck
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
retval = -EAGAIN;
goto out_ret;
}
/* We're below the limit (still or again), so we don't want to make
* further execve() calls fail. */
current->flags &= ~PF_NPROC_EXCEEDED;
retval = unshare_files(&displaced);
if (retval)
goto out_ret;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
goto out_files;
retval = prepare_bprm_creds(bprm);
if (retval)
goto out_free;
if (retval < 0)
goto out_free;
clear_in_exec = retval;
current->in_execve = 1;
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
sched_exec();
bprm->file = file;
bprm->filename = filename;
bprm->interp = filename;
retval = bprm_mm_init(bprm);
if (retval)
goto out_file;
bprm->argc = count(argv, MAX_ARG_STRINGS);
goto out;
bprm->envc = count(envp, MAX_ARG_STRINGS);
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
if ((retval = bprm->envc) < 0)
goto out;
retval = prepare_binprm(bprm);
if (retval < 0)
goto out;
retval = copy_strings_kernel(1, &bprm->filename, bprm);
if (retval < 0)
goto out;
bprm->exec = bprm->p;
retval = copy_strings(bprm->envc, envp, bprm);
if (retval < 0)
goto out;
retval = copy_strings(bprm->argc, argv, bprm);
if (retval < 0)
goto out;
retval = search_binary_handler(bprm,regs);
if (retval < 0)
goto out;
/* execve succeeded */
acct_update_integrals(current);
free_bprm(bprm);
if (displaced)
put_files_struct(displaced);
return retval;
if (bprm->mm) {
acct_arg_size(bprm, 0);
mmput(bprm->mm);
}
out_file:
if (bprm->file) {
allow_write_access(bprm->file);
fput(bprm->file);
}
if (clear_in_exec)
current->fs->in_exec = 0;
out_free:
out_files:
if (displaced)
reset_files_struct(displaced);
int do_execve(const char *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp,
struct pt_regs *regs)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
return do_execve_common(filename, argv, envp, regs);
}
#ifdef CONFIG_COMPAT
int compat_do_execve(char *filename,
compat_uptr_t __user *__argv,
compat_uptr_t __user *__envp,
struct pt_regs *regs)
{
struct user_arg_ptr argv = {
.is_compat = true,
.ptr.compat = __argv,
};
struct user_arg_ptr envp = {
.is_compat = true,
.ptr.compat = __envp,
};
return do_execve_common(filename, argv, envp, regs);
}
void set_binfmt(struct linux_binfmt *new)
struct mm_struct *mm = current->mm;
if (mm->binfmt)
module_put(mm->binfmt->module);
mm->binfmt = new;
if (new)
__module_get(new->module);
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
static int expand_corename(struct core_name *cn)
{
char *old_corename = cn->corename;
cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
if (!cn->corename) {
kfree(old_corename);
return -ENOMEM;
}
return 0;
}
static int cn_printf(struct core_name *cn, const char *fmt, ...)
{
char *cur;
int need;
int ret;
va_list arg;
va_start(arg, fmt);
need = vsnprintf(NULL, 0, fmt, arg);
va_end(arg);
if (likely(need < cn->size - cn->used - 1))
goto out_printf;
ret = expand_corename(cn);
if (ret)
goto expand_fail;
out_printf:
cur = cn->corename + cn->used;
va_start(arg, fmt);
vsnprintf(cur, need + 1, fmt, arg);
va_end(arg);
cn->used += need;
return 0;
expand_fail:
return ret;
}
static void cn_escape(char *str)
{
for (; *str; str++)
if (*str == '/')
*str = '!';
}
static int cn_print_exe_file(struct core_name *cn)
{
struct file *exe_file;
int ret;
exe_file = get_mm_exe_file(current->mm);
if (!exe_file) {
char *commstart = cn->corename + cn->used;
ret = cn_printf(cn, "%s (path unknown)", current->comm);
cn_escape(commstart);
return ret;
}
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
if (!pathbuf) {
ret = -ENOMEM;
goto put_exe_file;
}
path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
if (IS_ERR(path)) {
ret = PTR_ERR(path);
goto free_buf;
}
ret = cn_printf(cn, "%s", path);
free_buf:
kfree(pathbuf);
put_exe_file:
fput(exe_file);
return ret;
}
/* format_corename will inspect the pattern parameter, and output a
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
*/
static int format_corename(struct core_name *cn, long signr)
const struct cred *cred = current_cred();
const char *pat_ptr = core_pattern;
int ispipe = (*pat_ptr == '|');
int err = 0;
cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
cn->corename = kmalloc(cn->size, GFP_KERNEL);
cn->used = 0;
if (!cn->corename)
return -ENOMEM;
/* Repeat as long as we have more pattern to process and more output
space */
while (*pat_ptr) {
if (*pat_ptr != '%') {
if (*pat_ptr == 0)
err = cn_printf(cn, "%c", *pat_ptr++);
/* single % at the end, drop that */
case 0:
goto out;
/* Double percent, output one percent */
case '%':
err = cn_printf(cn, "%c", '%');
break;
/* pid */
case 'p':
pid_in_pattern = 1;
err = cn_printf(cn, "%d",
task_tgid_vnr(current));
err = cn_printf(cn, "%d", cred->uid);
err = cn_printf(cn, "%d", cred->gid);
break;
/* signal that caused the coredump */
case 's':
err = cn_printf(cn, "%ld", signr);
break;
/* UNIX time of coredump */
case 't': {
struct timeval tv;
do_gettimeofday(&tv);
err = cn_printf(cn, "%lu", tv.tv_sec);
case 'h': {
char *namestart = cn->corename + cn->used;
err = cn_printf(cn, "%s",
utsname()->nodename);
case 'e': {
char *commstart = cn->corename + cn->used;
err = cn_printf(cn, "%s", current->comm);
case 'E':
err = cn_print_exe_file(cn);
break;

Neil Horman
committed
/* core limit size */
case 'c':
err = cn_printf(cn, "%lu",
rlimit(RLIMIT_CORE));

Neil Horman
committed
break;
if (err)
return err;
/* Backward compatibility with core_uses_pid:
*
* If core_pattern does not include a %p (as is the default)
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
if (!ispipe && !pid_in_pattern && core_uses_pid) {
err = cn_printf(cn, ".%d", task_tgid_vnr(current));
if (err)
return err;
out:
return ispipe;
static int zap_process(struct task_struct *start, int exit_code)
{
struct task_struct *t;
start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
t = start;
do {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
if (t != current && t->mm) {
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
} while_each_thread(start, t);
static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
struct core_state *core_state, int exit_code)
int nr = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
if (!signal_group_exit(tsk->signal)) {
mm->core_state = core_state;
nr = zap_process(tsk, exit_code);
spin_unlock_irq(&tsk->sighand->siglock);
if (unlikely(nr < 0))
return nr;
if (atomic_read(&mm->mm_users) == nr + 1)
/*
* We should find and kill all tasks which use this mm, and we should
* count them correctly into ->nr_threads. We don't take tasklist
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
* lock, but this is safe wrt:
*
* fork:
* None of sub-threads can fork after zap_process(leader). All
* processes which were created before this point should be
* visible to zap_threads() because copy_process() adds the new
* process to the tail of init_task.tasks list, and lock/unlock
* of ->siglock provides a memory barrier.
*
* do_exit:
* The caller holds mm->mmap_sem. This means that the task which
* uses this mm can't pass exit_mm(), so it can't exit or clear
* its ->mm.
*
* de_thread:
* It does list_replace_rcu(&leader->tasks, ¤t->tasks),
* we must see either old or new leader, this does not matter.
* However, it can change p->sighand, so lock_task_sighand(p)
* must be used. Since p->mm != NULL and we hold ->mmap_sem
* it can't fail.
*
* Note also that "g" can be the old leader with ->mm == NULL
* and already unhashed and thus removed from ->thread_group.
* This is OK, __unhash_process()->list_del_rcu() does not
* clear the ->next pointer, we will find the new leader via
* next_thread().
*/
if (g == tsk->group_leader)
continue;
if (g->flags & PF_KTHREAD)
continue;
p = g;
do {
if (p->mm) {
if (unlikely(p->mm == mm)) {
lock_task_sighand(p, &flags);
nr += zap_process(p, exit_code);
unlock_task_sighand(p, &flags);
}
} while_each_thread(g, p);
atomic_set(&core_state->nr_threads, nr);
static int coredump_wait(int exit_code, struct core_state *core_state)
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
int core_waiters = -EBUSY;
init_completion(&core_state->startup);
core_state->dumper.task = tsk;
core_state->dumper.next = NULL;
down_write(&mm->mmap_sem);
if (!mm->core_state)
core_waiters = zap_threads(tsk, mm, core_state, exit_code);
wait_for_completion(&core_state->startup);
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
static void coredump_finish(struct mm_struct *mm)
{
struct core_thread *curr, *next;
struct task_struct *task;
next = mm->core_state->dumper.next;
while ((curr = next) != NULL) {
next = curr->next;
task = curr->task;
/*
* see exit_mm(), curr->task must not see
* ->task == NULL before we read ->next.
*/
smp_mb();
curr->task = NULL;
wake_up_process(task);
}
mm->core_state = NULL;
}
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
* these bits are not changed atomically. So get_dumpable can observe the
* intermediate state. To avoid doing unexpected behavior, get get_dumpable
* return either old dumpable or new one by paying attention to the order of
* modifying the bits.
*
* dumpable | mm->flags (binary)
* old new | initial interim final
* ---------+-----------------------
* 0 1 | 00 01 01
* 0 2 | 00 10(*) 11
* 1 0 | 01 00 00
* 1 2 | 01 11 11
* 2 0 | 11 10(*) 00
* 2 1 | 11 11 01
*
* (*) get_dumpable regards interim value of 10 as 11.
*/
void set_dumpable(struct mm_struct *mm, int value)
{
switch (value) {
case 0:
clear_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
case 1:
set_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
case 2:
set_bit(MMF_DUMP_SECURELY, &mm->flags);
smp_wmb();
set_bit(MMF_DUMPABLE, &mm->flags);
break;
}