zoukankan      html  css  js  c++  java
  • ulimit功能以及如何在内核中生效

     关键词:ulimit、getrlimit、setrlimit、RLIMIT_CPU、RLIMIT_CORE等等。

     内核资源限制通过ulimit进行读取和设置;ulimit进行资源设置之后,简单分析内核中是如何对系统行为进行限制的。

    1. 了解ulimit(busybox)

    以busybox中的ulimit为例,主要通过调用getrlimit()/setrlimit()设置系统的各种资源。

    ulimit设置和获取的资源主要有如下几种:

    #define RLIMIT_CPU        0    /* CPU time in sec */
    #define RLIMIT_FSIZE        1    /* Maximum filesize */
    #define RLIMIT_DATA        2    /* max data size */
    #define RLIMIT_STACK        3    /* max stack size */
    #define RLIMIT_CORE        4    /* max core file size */
    #define RLIMIT_RSS        5    /* max resident set size */
    #define RLIMIT_NPROC        6    /* max number of processes */
    #define RLIMIT_NOFILE        7    /* max number of open files */
    #define RLIMIT_MEMLOCK        8    /* max locked-in-memory address space */
    #define RLIMIT_AS        9    /* address space limit */
    #define RLIMIT_LOCKS        10    /* maximum file locks held */
    #define RLIMIT_SIGPENDING    11    /* max number of pending signals */
    #define RLIMIT_MSGQUEUE        12    /* maximum bytes in POSIX mqueues */
    #define RLIMIT_NICE        13    /* max nice prio allowed to raise to 0-39 for nice level 19 .. -20 */
    #define RLIMIT_RTPRIO        14    /* maximum realtime priority */
    #define RLIMIT_RTTIME        15    /* timeout for RT tasks in us */
    #define RLIM_NLIMITS        16

    用户空间对内核资源的限制通过getrlimit()/setrlimit()两个函数进行。

    其中resource就是如上的宏定义,struct rlimit是用户输入的阈值。

    struct rlimit {
        rlim_t rlim_cur;  /* Soft limit */
        rlim_t rlim_max;  /* Hard limit (ceiling for rlim_cur) */
    };
    
    #include <sys/time.h> #include <sys/resource.h> int getrlimit(int resource, struct rlimit *rlim); int setrlimit(int resource, const struct rlimit *rlim); int prlimit(pid_t pid, int resource, const struct rlimit *new_limit, struct rlimit *old_limit);

    内核中resource类型和ulimit命令的对应关系,通过limits_tbl[]关联起来。

    static const struct limits limits_tbl[] = {
        { RLIMIT_FSIZE,        9,    'f',    "file size (blocks)" },
        { RLIMIT_CPU,        0,    't',    "cpu time (seconds)" },
        { RLIMIT_DATA,        10,    'd',    "data seg size (kb)" },
        { RLIMIT_STACK,        10,    's',    "stack size (kb)" },
        { RLIMIT_CORE,        9,    'c',    "core file size (blocks)" },
        { RLIMIT_RSS,        10,    'm',    "resident set size (kb)" },
        { RLIMIT_MEMLOCK,    10,    'l',    "locked memory (kb)" },
        { RLIMIT_NPROC,        0,    'p',    "processes" },
        { RLIMIT_NOFILE,    0,    'n',    "file descriptors" },
        { RLIMIT_AS,        10,    'v',    "address space (kb)" },
        { RLIMIT_LOCKS,        0,    'w',    "locks" },
        { RLIMIT_NICE,        0,    'e',    "scheduling priority" },
        { RLIMIT_RTPRIO,    0,    'r',    "real-time priority" },
    };

    下面看看ulimit工具如何通过getrlimit()/setrlimit()对内核进行资源进行限制。

    int FAST_FUNC
    shell_builtin_ulimit(char **argv)
    {
        unsigned opts;
        unsigned argc;
    ...
        argc = string_array_len(argv);
    
        opts = 0;
        while (1) {
            struct rlimit limit;
            const struct limits *l;
            int opt_char = getopt(argc, argv, ulimit_opt_string);
    
            if (opt_char == -1)
                break;
            if (opt_char == 'H') {
                opts |= OPT_hard;
                continue;
            }
            if (opt_char == 'S') {
                opts |= OPT_soft;
                continue;
            }
    
            if (opt_char == 'a') {
                for (l = limits_tbl; l != &limits_tbl[ARRAY_SIZE(limits_tbl)]; l++) {
                    getrlimit(l->cmd, &limit);
                    printf("-%c: %-30s ", l->option, l->name);
                    printlim(opts, &limit, l);
                }
                continue;
            }
    
            if (opt_char == 1)
                opt_char = 'f';
            for (l = limits_tbl; l != &limits_tbl[ARRAY_SIZE(limits_tbl)]; l++) {----------------------------limits_tbl[]中是struct limits结构体的数组,对应每一个resource资源。
                if (opt_char == l->option) {-----------------------------------------------------------------选择和当前opt_char一致的limits_tbl[]成员,然后进行显示或者设置。
                    char *val_str;
    
                    getrlimit(l->cmd, &limit);---------------------------------------------------------------首先获取当前类型的resource。
    
                    val_str = optarg;
                    if (!val_str && argv[optind] && argv[optind][0] != '-')
                        val_str = argv[optind++]; /* ++ skips NN in "-c NN" case */
                    if (val_str) {---------------------------------------------------------------------------后面跟上参数的表示是设置,否则就是读取。
                        rlim_t val;
    
                        if (strcmp(val_str, "unlimited") == 0)
                            val = RLIM_INFINITY;-------------------------------------------------------------参数是unlimited类型。
                        else {
                            if (sizeof(val) == sizeof(int))
                                val = bb_strtou(val_str, NULL, 10);
                            else if (sizeof(val) == sizeof(long))
                                val = bb_strtoul(val_str, NULL, 10);
                            else
                                val = bb_strtoull(val_str, NULL, 10);
                            if (errno) {
                                bb_error_msg("invalid number '%s'", val_str);
                                return EXIT_FAILURE;
                            }
                            val <<= l->factor_shift;---------------------------------------------------------将参数转换成内核识别的值,这里面注意不同参数有factor_shift的区别,这是工具和内核之间的一个转换。
                        }
    //bb_error_msg("opt %c val_str:'%s' val:%lld", opt_char, val_str, (long long)val);
                        /* from man bash: "If neither -H nor -S
                         * is specified, both the soft and hard
                         * limits are set. */
                        if (!opts)---------------------------------------------------------------------------不指定-H/-S则两个都设置,否则单独设置。
                            opts = OPT_hard + OPT_soft;
                        if (opts & OPT_hard)
                            limit.rlim_max = val;
                        if (opts & OPT_soft)
                            limit.rlim_cur = val;
    //bb_error_msg("setrlimit(%d, %lld, %lld)", l->cmd, (long long)limit.rlim_cur, (long long)limit.rlim_max);
                        if (setrlimit(l->cmd, &limit) < 0) {-------------------------------------------------将指定类型的resource阈值设置到内核中。
                            bb_perror_msg("error setting limit");
                            return EXIT_FAILURE;
                        }
                    } else {
                        printlim(opts, &limit, l);
                    }
                    break;
                }
            } /* for (every possible opt) */
    
            if (l == &limits_tbl[ARRAY_SIZE(limits_tbl)]) {
                /* bad option. getopt already complained. */
                break;
            }
        } /* while (there are options) */
    
        return 0;
    }
    
    static void printlim(unsigned opts, const struct rlimit *limit,
                const struct limits *l)
    {
        rlim_t val;
    
        val = limit->rlim_max;
        if (!(opts & OPT_hard))
            val = limit->rlim_cur;
    
        if (val == RLIM_INFINITY)
            puts("unlimited");
        else {
            val >>= l->factor_shift;
            printf("%llu
    ", (long long) val);
        }
    }

    至此可以了解到ulimit是如何对内核resource产生影响的。 

    2. getrlimit()/setrlimit()内核调用

    getrlimit()/setrlimit()系统调用同名,实现如下。但是核心都是do_prlimit()。

    SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
    {
        struct rlimit value;
        int ret;
    
        ret = do_prlimit(current, resource, NULL, &value);
        if (!ret)
            ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
    
        return ret;
    }
    
    SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
    {
        struct rlimit new_rlim;
    
        if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
            return -EFAULT;
        return do_prlimit(current, resource, &new_rlim, NULL);
    }
    
    int do_prlimit(struct task_struct *tsk, unsigned int resource,
            struct rlimit *new_rlim, struct rlimit *old_rlim)
    {
        struct rlimit *rlim;
        int retval = 0;
    
        if (resource >= RLIM_NLIMITS)
            return -EINVAL;
        if (new_rlim) {
            if (new_rlim->rlim_cur > new_rlim->rlim_max)
                return -EINVAL;
            if (resource == RLIMIT_NOFILE &&
                    new_rlim->rlim_max > sysctl_nr_open)----------------------RLIMIT_NOFILE不能超过sysctl_nr_open数目。
                return -EPERM;
        }
    
        read_lock(&tasklist_lock);
        if (!tsk->sighand) {
            retval = -ESRCH;
            goto out;
        }
    
        rlim = tsk->signal->rlim + resource;
        task_lock(tsk->group_leader);
        if (new_rlim) {
            if (new_rlim->rlim_max > rlim->rlim_max &&
                    !capable(CAP_SYS_RESOURCE))
                retval = -EPERM;
            if (!retval)
                retval = security_task_setrlimit(tsk->group_leader,
                        resource, new_rlim);
            if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
                new_rlim->rlim_cur = 1;
            }
        }
    ...
        task_unlock(tsk->group_leader);
    
         if (!retval && new_rlim && resource == RLIMIT_CPU &&
                 new_rlim->rlim_cur != RLIM_INFINITY)
            update_rlimit_cpu(tsk, new_rlim->rlim_cur);--------------------------设置RLIMIT_CPU需要更新CPU相关信息。
    out:
        read_unlock(&tasklist_lock);
        return retval;
    }

    rlimit的设置比较简单,使用则分散则各处。

    获取当前系统resource限制接口有:

    static inline unsigned long task_rlimit(const struct task_struct *tsk,
            unsigned int limit)
    {
        return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
    }
    
    static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
            unsigned int limit)
    {
        return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
    }
    
    static inline unsigned long rlimit(unsigned int limit)
    {
        return task_rlimit(current, limit);
    }
    
    static inline unsigned long rlimit_max(unsigned int limit)
    {
        return task_rlimit_max(current, limit);
    }

    新创建进程/线程的rlimit继承自父进程的rlimit。

    3. 资源分类

    下面对各种类型资源在内核中是如何进行限制简单分析。

    3.1 RLIMIT_CPU 0 /* CPU time in sec */

    RLIMIT_CPU表示进程CPU运行时间的最大值,单位是秒。

    RLIMIT_CPU规定了进程所使用的做大CPU时间,超过soft发送SIGXCPU信号,超过hard发送SIGKILL信号。

    static void check_process_timers(struct task_struct *tsk,
    				 struct list_head *firing)
    {
    ...
    	soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);----------------------------------获取系统资源soft值。
    	if (soft != RLIM_INFINITY) {
    		unsigned long psecs = cputime_to_secs(ptime);------------------------------表示当前进程所占用的CPU时间。
    		unsigned long hard =
    			READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);-------------------------获取系统资源hard值。
    		cputime_t x;
    		if (psecs >= hard) {
    			/*
    			 * At the hard limit, we just die.
    			 * No need to calculate anything else now.
    			 */
    			__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);----------------如果进程CPU时间超过hard,则向进程发送SIGKILL信号,杀死进程。
    			return;
    		}
    		if (psecs >= soft) {
    			/*
    			 * At the soft limit, send a SIGXCPU every second.
    			 */
    			__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);----------------如果进程CPU时间超过soft,则向进程发送SIGXCPU信号。
    			if (soft < hard) {
    				soft++;
    				sig->rlim[RLIMIT_CPU].rlim_cur = soft;
    			}
    		}
    		x = secs_to_cputime(soft);
    		if (!prof_expires || x < prof_expires) {
    			prof_expires = x;
    		}
    	}
    ...
    }

    3.2 RLIMIT_FSIZE 1 /* Maximum filesize */

    RLIMIT_FSIZE表示创建文件大小的最大值,超过此大小则发送SIGXFSZ。

    int inode_newsize_ok(const struct inode *inode, loff_t offset)
    {
        if (inode->i_size < offset) {
            unsigned long limit;
    
            limit = rlimit(RLIMIT_FSIZE);---------------------------------获取系统RLIMIT_FSIZE大小。
            if (limit != RLIM_INFINITY && offset > limit)
                goto out_sig;
            if (offset > inode->i_sb->s_maxbytes)
                goto out_big;
        } else {
            /*
             * truncation of in-use swapfiles is disallowed - it would
             * cause subsequent swapout to scribble on the now-freed
             * blocks.
             */
            if (IS_SWAPFILE(inode))
                return -ETXTBSY;
        }
    
        return 0;
    out_sig:
        send_sig(SIGXFSZ, current, 0);------------------------------------发送SIGXFSZ信号。
    out_big:
        return -EFBIG;
    }

    3.3 RLIMIT_DATA 2 /* max data size */

    RLIMIT_DATA用于限制数据段大小的最大值。

    may_expand_vm()用于判断是否允许进程扩大自己的vm空间,返回true表示允许,false表示禁止。

    bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
    {
        if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)------------------------首先检查进程的total_vm+pages是否大于RLIMIT_AS,如果超过则返回false,表示不允许扩大vm空间。
            return false;
    
        if (is_data_mapping(flags) &&
            mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
            /* Workaround for Valgrind */
            if (rlimit(RLIMIT_DATA) == 0 &&
                mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
                return true;
            if (!ignore_rlimit_data) {
                pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.
    ",
                         current->comm, current->pid,
                         (mm->data_vm + npages) << PAGE_SHIFT,
                         rlimit(RLIMIT_DATA));
                return false;---------------------------------------------------------------如果区域大于RLIMIT_DATA,并且没有ignore_rlimit_data,返回false。
            }
        }
    
        return true;
    }

    3.4 RLIMIT_STACK 3 /* max stack size */

    RLIMIT_STACK表示一个线程/进程栈的最大尺寸。

    expand_stack()会对增加后的尺寸进行检查,确保符合RLIMIT_STACK等一系列限制。

    int expand_stack(struct vm_area_struct *vma, unsigned long address)
    {
        return expand_downwards(vma, address);
    }
    
    int expand_downwards(struct vm_area_struct *vma,
                       unsigned long address)
    {
    ...
        /* Somebody else might have raced and expanded it already */
        if (address < vma->vm_start) {
            unsigned long size, grow;
    
            size = vma->vm_end - address;
            grow = (vma->vm_start - address) >> PAGE_SHIFT;
    
            error = -ENOMEM;
            if (grow <= vma->vm_pgoff) {
                error = acct_stack_growth(vma, size, grow);
                if (!error) {
    ...
                }
            }
        }
        anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
        return error;
    }
    
    static int acct_stack_growth(struct vm_area_struct *vma,
                     unsigned long size, unsigned long grow)
    {
        struct mm_struct *mm = vma->vm_mm;
        struct rlimit *rlim = current->signal->rlim;
        unsigned long new_start;
    
        /* address space limit tests */
        if (!may_expand_vm(mm, vma->vm_flags, grow))-------------首先检查内存空间是否够用。
            return -ENOMEM;
    
        /* Stack limit test */
        if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
            return -ENOMEM;--------------------------------------检查申请栈size是否超过栈空间限制。
    
        /* mlock limit tests */
        if (vma->vm_flags & VM_LOCKED) {
            unsigned long locked;
            unsigned long limit;
            locked = mm->locked_vm + grow;
            limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
            limit >>= PAGE_SHIFT;
            if (locked > limit && !capable(CAP_IPC_LOCK))
                return -ENOMEM;
        }
    
        /* Check to ensure the stack will not grow into a hugetlb-only region */
        new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
                vma->vm_end - size;
        if (is_hugepage_only_range(vma->vm_mm, new_start, size))
            return -EFAULT;
    
        /*
         * Overcommit..  This must be the final test, as it will
         * update security statistics.
         */
        if (security_vm_enough_memory_mm(mm, grow))
            return -ENOMEM;
    
        return 0;
    }

    3.5 RLIMIT_CORE 4 /* max core file size */

    RLIMIT_CORE限制了coredump产生文件尺寸的最大值,如果为0说明不允许创建core文件。

    void do_coredump(const siginfo_t *siginfo)
    {
    ...
        struct coredump_params cprm = {
            .siginfo = siginfo,
            .regs = signal_pt_regs(),
            .limit = rlimit(RLIMIT_CORE),--------------------------------cprm中包含了对coredump文件大小的限制,在具体格式进行coredump过程中会检查coredump文件是否超过此值。
            .mm_flags = mm->flags,
        };
    ...
    }

    3.6 RLIMIT_RSS 5 /* max resident set size */

    RLIMIT_RSS限制了进程最大实际内存使用量,未起作用。

    3.7 RLIMIT_NPROC 6 /* max number of processes */

    RLIMIT_NPROC规定了每个real user id的子进程数量的最大值.

    do_execueat_common()创建新进程的时候检查current_user()->processes,如果超过RLIMIT_NPROC则返回EAGAIN,表示资源不够使用。

    copy_process()创建新进程/线程的使用同样会进行检查。

    static int do_execveat_common(int fd, struct filename *filename,
                      struct user_arg_ptr argv,
                      struct user_arg_ptr envp,
                      int flags)
    {
    ...
        if ((current->flags & PF_NPROC_EXCEEDED) &&
            atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
            retval = -EAGAIN;
            goto out_ret;
        }
    ...
    }
    
    static __latent_entropy struct task_struct *copy_process(
                        unsigned long clone_flags,
                        unsigned long stack_start,
                        unsigned long stack_size,
                        int __user *child_tidptr,
                        struct pid *pid,
                        int trace,
                        unsigned long tls,
                        int node)
    {
    ...
        if (atomic_read(&p->real_cred->user->processes) >=
                task_rlimit(p, RLIMIT_NPROC)) {
            if (p->real_cred->user != INIT_USER &&
                !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
    goto bad_fork_free;
            }
        }
    ...
    }

    max_threads如何计算?

    max_threads的大小是由set_max_threads()计算出来的。

    在进程创建的时候fork_init()设置max_threads,或者通过sysctl_max_threads()进行设置。

    可以通过/proc/sys/kernel/threads-max获取当前系统的max_threads。

    void __init fork_init(void)
    {
    ...
    set_max_threads(MAX_THREADS); ... } int sysctl_max_threads(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; int threads = max_threads; int min = MIN_THREADS; int max = MAX_THREADS; t = *table; t.data = &threads; t.extra1 = &min; t.extra2 = &max; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (ret || !write) return ret; set_max_threads(threads); return 0; } static void set_max_threads(unsigned int max_threads_suggested) { u64 threads; /* * The number of threads shall be limited such that the thread * structures may only consume a small part of the available memory. */ if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, (u64) THREAD_SIZE * 8UL); if (threads > max_threads_suggested) threads = max_threads_suggested; max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); }

    THREAD_SIZE为2个页面:

    #define THREAD_SIZE    (PAGE_SIZE * 2)

      #define MIN_THREADS 20

      #define FUTEX_TID_MASK 0x3fffffff

      #define MAX_THREADS FUTEX_TID_MASK

    所以max_threads数量为max_threads=totalram_pages*PAGE_SIZE/(THREAD_SIZE*8)。

    在totalram_pages为100556的情况下,max_threads=100556/16=6284.75,实际的RLIMIT_NPROC=max_threads/2,即为3142。

    可以通过ulimit -p验证。

    3.8 RLIMIT_NOFILE 7 /* max number of open files */

    RLIMIT_NOFILE限制进程打开文件数量最大值。

    比如alloc_fd()申请文件句柄号,end对应的就是RLIMIT_NOFILES。

    static int alloc_fd(unsigned start, unsigned flags)
    {
        return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);-----current->files是当前进程的打开文件列表。
    }
    
    int get_unused_fd_flags(unsigned flags)
    {
        return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
    }
    
    int __alloc_fd(struct files_struct *files,
               unsigned start, unsigned end, unsigned flags)
    {
    ...
        error = -EMFILE;
        if (fd >= end)---------------------------------------------------------------如果找到的fd超过RLIMIT_NOFILE则返回错误。
            goto out;
    ...
    }

    3.9 RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */

    RLIMIT_MEMLOCK用于限制使用mlock()锁定的locked_vm内存最大使用量。

    static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
    {
        unsigned long locked;
        unsigned long lock_limit;
        int error = -ENOMEM;
    
        if (!can_do_mlock())
            return -EPERM;
    
        lru_add_drain_all();    /* flush pagevec */
    
        len = PAGE_ALIGN(len + (offset_in_page(start)));
        start &= PAGE_MASK;
    
        lock_limit = rlimit(RLIMIT_MEMLOCK);-----------------------------------------系统对RLIMIT_MEMLOCK的阈值。
        lock_limit >>= PAGE_SHIFT;
        locked = len >> PAGE_SHIFT;--------------------------------------------------本次mlock内存大小。
    
        if (down_write_killable(&current->mm->mmap_sem))
            return -EINTR;
    
        locked += current->mm->locked_vm;--------------------------------------------进程中已经mlock内存大小。
        if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
            locked -= count_mm_mlocked_page_nr(current->mm,
                    start, len);
        }
    
        if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))-------------------------进行mlock内存检查,如有错误返回错误类型。
            error = apply_vma_lock_flags(start, len, flags);
    
        up_write(&current->mm->mmap_sem);
        if (error)
            return error;
    
        error = __mm_populate(start, len, 0);
        if (error)
            return __mlock_posix_error_return(error);
        return 0;
    }

    3.10 RLIMIT_AS 9 /* address space limit */

     RLIMIT_AS表示进程可使用的最大虚拟内存大小,超过后则不允许继续申请内存。

    bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
    {
        if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)----------------------total_vm加上将要新增内存,如果超过RLIMIT_AS则返回错误。
            return false;
    ...
        return true;
    }

    3.11 RLIMIT_LOCKS 10 /* maximum file locks held */

     RLIMIT_LOCKS表示进程可建立的文件锁数量最大值,未使用。

    3.12 RLIMIT_SIGPENDING 11 /* max number of pending signals */

     RLIMIT_SIGPENDING表示进程信号等待队列最大大小,一般等RLIMIT_NPROC。

    void __init fork_init(void)
    {
    ...
        set_max_threads(MAX_THREADS);
    
        init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
        init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
        init_task.signal->rlim[RLIMIT_SIGPENDING] =
            init_task.signal->rlim[RLIMIT_NPROC];
    ...
    }

    __sigqueue_alloc()中,检查override_rlimit以及RLIMIT_SIGPENDING,才会对最初是否分配内存给pending信号。否则丢弃。

    static struct sigqueue *
    __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
    {
    ...
        if (override_rlimit ||
            atomic_read(&user->sigpending) <=
                task_rlimit(t, RLIMIT_SIGPENDING)) {-----------------------------------在不使用override_rlimit以及当前用户sigpending不超过RLIMIT_SIGPENDING条件下,才可以申请sigqueue。
            q = kmem_cache_alloc(sigqueue_cachep, flags);
        } else {
            print_dropped_signal(sig);-------------------------------------------------否则信号将被丢弃。
        }
    
        if (unlikely(q == NULL)) {
            atomic_dec(&user->sigpending);
            free_uid(user);
        } else {
            INIT_LIST_HEAD(&q->list);
            q->flags = 0;
            q->user = user;
        }
    
        return q;
    }

    3.13 RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */

     RLIMIT_MSGQUEUE限制了进程可谓POSIX消息队列分配的最大字节数,超过限制后返回EMFILE错误。

    static struct inode *mqueue_get_inode(struct super_block *sb,
            struct ipc_namespace *ipc_ns, umode_t mode,
            struct mq_attr *attr)
    {
    ...
        if (S_ISREG(mode)) {
    ...
            if (u->mq_bytes + mq_bytes < u->mq_bytes ||
                u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
                spin_unlock(&mq_lock);
                /* mqueue_evict_inode() releases info->messages */
                ret = -EMFILE;
                goto out_inode;
            }
    ...
        } else if (S_ISDIR(mode)) {
    ...
        }
    
        return inode;
    out_inode:
        iput(inode);
    err:
        return ERR_PTR(ret);
    }

    3.14 RLIMIT_NICE 13 /* max nice prio allowed to raise to 0-39 for nice level 19 .. -20 */

     RLIMIT_NICE限制了进程可通过setpriority()或者nice()调用设置的最大nice值。

    static void binder_set_nice(long nice)
    {
        long min_nice;
    
        if (can_nice(current, nice)) {
            set_user_nice(current, nice);
            return;
        }
        min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur);
        binder_debug(BINDER_DEBUG_PRIORITY_CAP,
                 "%d: nice value %ld not allowed use %ld instead
    ",
                  current->pid, nice, min_nice);
        set_user_nice(current, min_nice);
        if (min_nice <= MAX_NICE)
            return;
        binder_user_error("%d RLIMIT_NICE not set
    ", current->pid);
    }
    
    int can_nice(const struct task_struct *p, const int nice)
    {
        /* convert nice value [19,-20] to rlimit style value [1,40] */
        int nice_rlim = nice_to_rlimit(nice);
    
        return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
            capable(CAP_SYS_NICE));----------------------------------------------只有在进程具备CAP_SYS_NICE并且申请的nice值小于RLIMIT_NICE,才会被允许修改nice值。
    }

    3.15 RLIMIT_RTPRIO 14 /* maximum realtime priority */

     RLIMIT_RTPRIO限制进程可通过sched_setscheduler()和sched_setparam()可设置的最大实时优先级。

    对于RT线程,超过RLIMIT_RTPRIO则返回EPERM错误。

    static int __sched_setscheduler(struct task_struct *p,
                    const struct sched_attr *attr,
                    bool user, bool pi)
    {
    ...
        if (user && !capable(CAP_SYS_NICE)) {
            if (fair_policy(policy)) {
                if (attr->sched_nice < task_nice(p) &&
                    !can_nice(p, attr->sched_nice))
                    return -EPERM;
            }
    
            if (rt_policy(policy)) {
                unsigned long rlim_rtprio =
                        task_rlimit(p, RLIMIT_RTPRIO);
    
                /* can't set/change the rt policy */
                if (policy != p->policy && !rlim_rtprio)
                    return -EPERM;
    
                /* can't increase priority */
                if (attr->sched_priority > p->rt_priority &&
                    attr->sched_priority > rlim_rtprio)
                    return -EPERM;
            }
    ...
        }
    ...
    }

    3.16 RLIMIT_RTTIME 15 /* timeout for RT tasks in us */

    RLIMIT_RTTIME限制了实时进程timer最大超时时间。

    check_thread_timers()中会对定时器超时值进行检查,并且watchdog()中

    static void check_thread_timers(struct task_struct *tsk,
                    struct list_head *firing)
    {
    ...
        soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
        if (soft != RLIM_INFINITY) {
            unsigned long hard =
                READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
    
            if (hard != RLIM_INFINITY &&
                tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
                /*
                 * At the hard limit, we just die.
                 * No need to calculate anything else now.
                 */
                __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);--------------------------如果实时线程的超时值,超过RLIMIT_RTTIME的rlim_max之后发送SIGKILL信号。
                return;
            }
            if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
                /*
                 * At the soft limit, send a SIGXCPU every second.
                 */
                if (soft < hard) {
                    soft += USEC_PER_SEC;
                    sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
                }
                printk(KERN_INFO
                    "RT Watchdog Timeout: %s[%d]
    ",
                    tsk->comm, task_pid_nr(tsk));
                __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);-------------------------如果实时线程的超时值,超过RLIMIT_RTTIME的rlim_cur之后发送SIGXCPU信号。
            }
        }
        if (task_cputime_zero(tsk_expires))
            tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
    }
    
    static void watchdog(struct rq *rq, struct task_struct *p)
    {
        unsigned long soft, hard;
    
        /* max may change after cur was read, this will be fixed next tick */
        soft = task_rlimit(p, RLIMIT_RTTIME);
        hard = task_rlimit_max(p, RLIMIT_RTTIME);
    
        if (soft != RLIM_INFINITY) {
            unsigned long next;
    
            if (p->rt.watchdog_stamp != jiffies) {
                p->rt.timeout++;
                p->rt.watchdog_stamp = jiffies;
            }
    
            next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
            if (p->rt.timeout > next)
                p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
        }
    }
  • 相关阅读:
    hdu_2224_The shortest path(dp)
    hdu_4824_Disk Schedule(dp)
    hdu_5680_zxa and set(想法题)
    hdu_5683_zxa and xor(非正解的暴力)
    hdu_1429_胜利大逃亡(续)(BFS状压)
    hdu_1254_推箱子(双BFS)
    hdu_1969_pie(二分)
    hdu_2446_Shell Pyramid(数学,二分)
    hdu_2141_Can you find it?(二分)
    5.2 nc + JMX查看分布式程序数据
  • 原文地址:https://www.cnblogs.com/arnoldlu/p/12162776.html
Copyright © 2011-2022 走看看