Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 22 Mar 2020 17:46:50 +0000 (10:46 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 22 Mar 2020 17:46:50 +0000 (10:46 -0700)
Merge misc fixes from Andrew Morton:
 "10 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  x86/mm: split vmalloc_sync_all()
  mm, slub: prevent kmalloc_node crashes and memory leaks
  mm/mmu_notifier: silence PROVE_RCU_LIST warnings
  epoll: fix possible lost wakeup on epoll_ctl() path
  mm: do not allow MADV_PAGEOUT for CoW pages
  mm, memcg: throttle allocators based on ancestral memory.high
  mm, memcg: fix corruption on 64-bit divisor in memory.high throttling
  page-flags: fix a crash at SetPageError(THP_SWAP)
  mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case
  memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event

drivers/nvme/host/rdma.c
drivers/nvme/target/tcp.c
fs/file.c
fs/io_uring.c
include/linux/file.h
include/linux/socket.h
net/socket.c
tools/power/x86/turbostat/Makefile
tools/power/x86/turbostat/turbostat.c

index 3e85c5c..0fe08c4 100644 (file)
@@ -850,9 +850,11 @@ out_free_tagset:
        if (new)
                blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
 out_free_async_qe:
-       nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
-               sizeof(struct nvme_command), DMA_TO_DEVICE);
-       ctrl->async_event_sqe.data = NULL;
+       if (ctrl->async_event_sqe.data) {
+               nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+                       sizeof(struct nvme_command), DMA_TO_DEVICE);
+               ctrl->async_event_sqe.data = NULL;
+       }
 out_free_queue:
        nvme_rdma_free_queue(&ctrl->queues[0]);
        return error;
index af674fc..5bb5342 100644 (file)
@@ -515,7 +515,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
        return 1;
 }
 
-static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
+static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
 {
        struct nvmet_tcp_queue *queue = cmd->queue;
        int ret;
@@ -523,9 +523,15 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
        while (cmd->cur_sg) {
                struct page *page = sg_page(cmd->cur_sg);
                u32 left = cmd->cur_sg->length - cmd->offset;
+               int flags = MSG_DONTWAIT;
+
+               if ((!last_in_batch && cmd->queue->send_list_len) ||
+                   cmd->wbytes_done + left < cmd->req.transfer_len ||
+                   queue->data_digest || !queue->nvme_sq.sqhd_disabled)
+                       flags |= MSG_MORE;
 
                ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
-                                       left, MSG_DONTWAIT | MSG_MORE);
+                                       left, flags);
                if (ret <= 0)
                        return ret;
 
@@ -660,7 +666,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
        }
 
        if (cmd->state == NVMET_TCP_SEND_DATA) {
-               ret = nvmet_try_send_data(cmd);
+               ret = nvmet_try_send_data(cmd, last_in_batch);
                if (ret <= 0)
                        goto done_send;
        }
index a364e1a..c8a4e4c 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -540,9 +540,14 @@ static int alloc_fd(unsigned start, unsigned flags)
        return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
 }
 
+int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
+{
+       return __alloc_fd(current->files, 0, nofile, flags);
+}
+
 int get_unused_fd_flags(unsigned flags)
 {
-       return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+       return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
 }
 EXPORT_SYMBOL(get_unused_fd_flags);
 
index 1b25172..3affd96 100644 (file)
@@ -343,6 +343,7 @@ struct io_accept {
        struct sockaddr __user          *addr;
        int __user                      *addr_len;
        int                             flags;
+       unsigned long                   nofile;
 };
 
 struct io_sync {
@@ -397,6 +398,7 @@ struct io_open {
        struct filename                 *filename;
        struct statx __user             *buffer;
        struct open_how                 how;
+       unsigned long                   nofile;
 };
 
 struct io_files_update {
@@ -2577,6 +2579,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return ret;
        }
 
+       req->open.nofile = rlimit(RLIMIT_NOFILE);
        req->flags |= REQ_F_NEED_CLEANUP;
        return 0;
 }
@@ -2618,6 +2621,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return ret;
        }
 
+       req->open.nofile = rlimit(RLIMIT_NOFILE);
        req->flags |= REQ_F_NEED_CLEANUP;
        return 0;
 }
@@ -2636,7 +2640,7 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
        if (ret)
                goto err;
 
-       ret = get_unused_fd_flags(req->open.how.flags);
+       ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
        if (ret < 0)
                goto err;
 
@@ -3321,6 +3325,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
        accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
        accept->flags = READ_ONCE(sqe->accept_flags);
+       accept->nofile = rlimit(RLIMIT_NOFILE);
        return 0;
 #else
        return -EOPNOTSUPP;
@@ -3337,7 +3342,8 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
 
        file_flags = force_nonblock ? O_NONBLOCK : 0;
        ret = __sys_accept4_file(req->file, file_flags, accept->addr,
-                                       accept->addr_len, accept->flags);
+                                       accept->addr_len, accept->flags,
+                                       accept->nofile);
        if (ret == -EAGAIN && force_nonblock)
                return -EAGAIN;
        if (ret == -ERESTARTSYS)
@@ -4131,6 +4137,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
 {
        ssize_t ret = 0;
 
+       if (!sqe)
+               return 0;
+
        if (io_op_defs[req->opcode].file_table) {
                ret = io_grab_files(req);
                if (unlikely(ret))
@@ -4907,6 +4916,11 @@ err_req:
                if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
                        req->flags |= REQ_F_LINK;
                        INIT_LIST_HEAD(&req->link_list);
+
+                       if (io_alloc_async_ctx(req)) {
+                               ret = -EAGAIN;
+                               goto err_req;
+                       }
                        ret = io_req_defer_prep(req, sqe);
                        if (ret)
                                req->flags |= REQ_F_FAIL_LINK;
index c6c7b24..142d102 100644 (file)
@@ -85,6 +85,7 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
 extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
 extern void set_close_on_exec(unsigned int fd, int flag);
 extern bool get_close_on_exec(unsigned int fd);
+extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile);
 extern int get_unused_fd_flags(unsigned flags);
 extern void put_unused_fd(unsigned int fd);
 
index 2d23134..15f3412 100644 (file)
@@ -401,7 +401,8 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len,
                        int addr_len);
 extern int __sys_accept4_file(struct file *file, unsigned file_flags,
                        struct sockaddr __user *upeer_sockaddr,
-                        int __user *upeer_addrlen, int flags);
+                        int __user *upeer_addrlen, int flags,
+                        unsigned long nofile);
 extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
                         int __user *upeer_addrlen, int flags);
 extern int __sys_socket(int family, int type, int protocol);
index b79a05d..2eecf15 100644 (file)
@@ -1707,7 +1707,8 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
 
 int __sys_accept4_file(struct file *file, unsigned file_flags,
                       struct sockaddr __user *upeer_sockaddr,
-                      int __user *upeer_addrlen, int flags)
+                      int __user *upeer_addrlen, int flags,
+                      unsigned long nofile)
 {
        struct socket *sock, *newsock;
        struct file *newfile;
@@ -1738,7 +1739,7 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
         */
        __module_get(newsock->ops->owner);
 
-       newfd = get_unused_fd_flags(flags);
+       newfd = __get_unused_fd_flags(flags, nofile);
        if (unlikely(newfd < 0)) {
                err = newfd;
                sock_release(newsock);
@@ -1807,7 +1808,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
        f = fdget(fd);
        if (f.file) {
                ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
-                                               upeer_addrlen, flags);
+                                               upeer_addrlen, flags,
+                                               rlimit(RLIMIT_NOFILE));
                if (f.flags)
                        fput(f.file);
        }
index 13f1e8b..2b65512 100644 (file)
@@ -16,7 +16,7 @@ override CFLAGS +=    -D_FORTIFY_SOURCE=2
 
 %: %.c
        @mkdir -p $(BUILD_OUTPUT)
-       $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS)
+       $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
 
 .PHONY : clean
 clean :
index 31c1ca0..33b3708 100644 (file)
@@ -30,7 +30,7 @@
 #include <sched.h>
 #include <time.h>
 #include <cpuid.h>
-#include <linux/capability.h>
+#include <sys/capability.h>
 #include <errno.h>
 #include <math.h>
 
@@ -304,6 +304,10 @@ int *irqs_per_cpu;         /* indexed by cpu_num */
 
 void setup_all_buffers(void);
 
+char *sys_lpi_file;
+char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
+char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
+
 int cpu_is_not_present(int cpu)
 {
        return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
@@ -2916,8 +2920,6 @@ int snapshot_gfx_mhz(void)
  *
  * record snapshot of
  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
- *
- * return 1 if config change requires a restart, else return 0
  */
 int snapshot_cpu_lpi_us(void)
 {
@@ -2941,17 +2943,14 @@ int snapshot_cpu_lpi_us(void)
 /*
  * snapshot_sys_lpi()
  *
- * record snapshot of
- * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
- *
- * return 1 if config change requires a restart, else return 0
+ * record snapshot of sys_lpi_file
  */
 int snapshot_sys_lpi_us(void)
 {
        FILE *fp;
        int retval;
 
-       fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
+       fp = fopen_or_die(sys_lpi_file, "r");
 
        retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
        if (retval != 1) {
@@ -3151,28 +3150,42 @@ void check_dev_msr()
                        err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
 }
 
-void check_permissions()
+/*
+ * check for CAP_SYS_RAWIO
+ * return 0 on success
+ * return 1 on fail
+ */
+int check_for_cap_sys_rawio(void)
 {
-       struct __user_cap_header_struct cap_header_data;
-       cap_user_header_t cap_header = &cap_header_data;
-       struct __user_cap_data_struct cap_data_data;
-       cap_user_data_t cap_data = &cap_data_data;
-       extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
-       int do_exit = 0;
-       char pathname[32];
+       cap_t caps;
+       cap_flag_value_t cap_flag_value;
 
-       /* check for CAP_SYS_RAWIO */
-       cap_header->pid = getpid();
-       cap_header->version = _LINUX_CAPABILITY_VERSION;
-       if (capget(cap_header, cap_data) < 0)
-               err(-6, "capget(2) failed");
+       caps = cap_get_proc();
+       if (caps == NULL)
+               err(-6, "cap_get_proc\n");
 
-       if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
-               do_exit++;
+       if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
+               err(-6, "cap_get\n");
+
+       if (cap_flag_value != CAP_SET) {
                warnx("capget(CAP_SYS_RAWIO) failed,"
                        " try \"# setcap cap_sys_rawio=ep %s\"", progname);
+               return 1;
        }
 
+       if (cap_free(caps) == -1)
+               err(-6, "cap_free\n");
+
+       return 0;
+}
+void check_permissions(void)
+{
+       int do_exit = 0;
+       char pathname[32];
+
+       /* check for CAP_SYS_RAWIO */
+       do_exit += check_for_cap_sys_rawio();
+
        /* test file permissions */
        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
        if (euidaccess(pathname, R_OK)) {
@@ -3265,6 +3278,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
        case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
+       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
                pkg_cstate_limits = glm_pkg_cstate_limits;
                break;
        default:
@@ -3336,6 +3350,17 @@ int is_skx(unsigned int family, unsigned int model)
        }
        return 0;
 }
+int is_ehl(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+
+       switch (model) {
+       case INTEL_FAM6_ATOM_TREMONT:
+               return 1;
+       }
+       return 0;
+}
 
 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
@@ -3478,6 +3503,23 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
        dump_nhm_cst_cfg();
 }
 
+static void dump_sysfs_file(char *path)
+{
+       FILE *input;
+       char cpuidle_buf[64];
+
+       input = fopen(path, "r");
+       if (input == NULL) {
+               if (debug)
+                       fprintf(outf, "NSFOD %s\n", path);
+               return;
+       }
+       if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
+               err(1, "%s: failed to read file", path);
+       fclose(input);
+
+       fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
+}
 static void
 dump_sysfs_cstate_config(void)
 {
@@ -3491,6 +3533,15 @@ dump_sysfs_cstate_config(void)
        if (!DO_BIC(BIC_sysfs))
                return;
 
+       if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
+               fprintf(outf, "cpuidle not loaded\n");
+               return;
+       }
+
+       dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
+       dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
+       dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
+
        for (state = 0; state < 10; ++state) {
 
                sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
@@ -3894,6 +3945,20 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
                else
                        BIC_PRESENT(BIC_PkgWatt);
                break;
+       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
+               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+               if (rapl_joules) {
+                       BIC_PRESENT(BIC_Pkg_J);
+                       BIC_PRESENT(BIC_Cor_J);
+                       BIC_PRESENT(BIC_RAM_J);
+                       BIC_PRESENT(BIC_GFX_J);
+               } else {
+                       BIC_PRESENT(BIC_PkgWatt);
+                       BIC_PRESENT(BIC_CorWatt);
+                       BIC_PRESENT(BIC_RAMWatt);
+                       BIC_PRESENT(BIC_GFXWatt);
+               }
+               break;
        case INTEL_FAM6_SKYLAKE_L:      /* SKL */
        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
@@ -4295,6 +4360,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
        case INTEL_FAM6_ATOM_GOLDMONT:          /* BXT */
        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
        case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
+       case INTEL_FAM6_ATOM_TREMONT:           /* EHL */
                return 1;
        }
        return 0;
@@ -4324,6 +4390,7 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
        case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
        case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
        case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
                return 1;
        }
        return 0;
@@ -4610,14 +4677,24 @@ unsigned int intel_model_duplicates(unsigned int model)
        case INTEL_FAM6_SKYLAKE:
        case INTEL_FAM6_KABYLAKE_L:
        case INTEL_FAM6_KABYLAKE:
+       case INTEL_FAM6_COMETLAKE_L:
+       case INTEL_FAM6_COMETLAKE:
                return INTEL_FAM6_SKYLAKE_L;
 
        case INTEL_FAM6_ICELAKE_L:
        case INTEL_FAM6_ICELAKE_NNPI:
+       case INTEL_FAM6_TIGERLAKE_L:
+       case INTEL_FAM6_TIGERLAKE:
                return INTEL_FAM6_CANNONLAKE_L;
 
        case INTEL_FAM6_ATOM_TREMONT_D:
                return INTEL_FAM6_ATOM_GOLDMONT_D;
+
+       case INTEL_FAM6_ATOM_TREMONT_L:
+               return INTEL_FAM6_ATOM_TREMONT;
+
+       case INTEL_FAM6_ICELAKE_X:
+               return INTEL_FAM6_SKYLAKE_X;
        }
        return model;
 }
@@ -4872,7 +4949,8 @@ void process_cpuid()
        do_slm_cstates = is_slm(family, model);
        do_knl_cstates  = is_knl(family, model);
 
-       if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
+       if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
+           is_ehl(family, model))
                BIC_NOT_PRESENT(BIC_CPU_c3);
 
        if (!quiet)
@@ -4907,10 +4985,16 @@ void process_cpuid()
        else
                BIC_NOT_PRESENT(BIC_CPU_LPI);
 
-       if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
+       if (!access(sys_lpi_file_sysfs, R_OK)) {
+               sys_lpi_file = sys_lpi_file_sysfs;
                BIC_PRESENT(BIC_SYS_LPI);
-       else
+       } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+               sys_lpi_file = sys_lpi_file_debugfs;
+               BIC_PRESENT(BIC_SYS_LPI);
+       } else {
+               sys_lpi_file_sysfs = NULL;
                BIC_NOT_PRESENT(BIC_SYS_LPI);
+       }
 
        if (!quiet)
                decode_misc_feature_control();
@@ -5306,7 +5390,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(outf, "turbostat version 19.08.31"
+       fprintf(outf, "turbostat version 20.03.20"
                " - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -5323,9 +5407,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
        }
 
        msrp->msr_num = msr_num;
-       strncpy(msrp->name, name, NAME_BYTES);
+       strncpy(msrp->name, name, NAME_BYTES - 1);
        if (path)
-               strncpy(msrp->path, path, PATH_BYTES);
+               strncpy(msrp->path, path, PATH_BYTES - 1);
        msrp->width = width;
        msrp->type = type;
        msrp->format = format;