Merge tag 'riscv-for-linus-4.20-mw0' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Oct 2018 01:01:29 +0000 (18:01 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Oct 2018 01:01:29 +0000 (18:01 -0700)
Pull RISC-V updates from Palmer Dabbelt:
 "This patch set contains a lot (at least, for me) of improvements to
  the RISC-V kernel port:

   - The removal of some cacheinfo values that were bogus.

   - On systems with F but without D the kernel will not show the F
     extension to userspace, as it isn't actually supported.

   - Support for futexes.

   - Removal of some unused code.

   - Cleanup of some menuconfig entries.

   - Support for systems without a floating-point unit, and for building
     kernels that will never use the floating-point unit.

   - More fixes to the RV32I port, which regressed again. It's really
     time to get this into a regression test somewhere so I stop
     breaking it. Thanks to Zong for resurrecting it again!

   - Various fixes that resulted from a year old review of our original
     patch set that I finally got around to.

   - Various improvements to SMP support, largely based around having
     switched to logical hart numbering, as well as some interrupt
     improvements. This one is in the same patch set as above, thanks to
     Atish for sheparding everything though as my patch set was a bit of
     a mess.

  I'm pretty sure this is our largest patch set since the original
  kernel contribution, and it's certainly the one with the most
  contributors. While I don't have anything else I know I'm going to
  submit for the merge window, I would be somewhat surprised if I didn't
  screw anything up.

  Thanks for the help, everyone!"

* tag 'riscv-for-linus-4.20-mw0' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/riscv-linux: (31 commits)
  RISC-V: Cosmetic menuconfig changes
  riscv: move GCC version check for ARCH_SUPPORTS_INT128 to Kconfig
  RISC-V: remove the unused return_to_handler export
  RISC-V: Add futex support.
  RISC-V: Add FP register ptrace support for gdb.
  RISC-V: Mask out the F extension on systems without D
  RISC-V: Don't set cacheinfo.{physical_line_partition,attributes}
  RISC-V: Show IPI stats
  RISC-V: Show CPU ID and Hart ID separately in /proc/cpuinfo
  RISC-V: Use Linux logical CPU number instead of hartid
  RISC-V: Add logical CPU indexing for RISC-V
  RISC-V: Use WRITE_ONCE instead of direct access
  RISC-V: Use mmgrab()
  RISC-V: Rename im_okay_therefore_i_am to found_boot_cpu
  RISC-V: Rename riscv_of_processor_hart to riscv_of_processor_hartid
  RISC-V: Provide a cleaner raw_smp_processor_id()
  RISC-V: Disable preemption before enabling interrupts
  RISC-V: Comment on the TLB flush in smp_callin()
  RISC-V: Filter ISA and MMU values in cpuinfo
  RISC-V: Don't set cacheinfo.{physical_line_partition,attributes}
  ...

33 files changed:
arch/riscv/Kconfig
arch/riscv/Kconfig.debug
arch/riscv/Makefile
arch/riscv/include/asm/Kbuild
arch/riscv/include/asm/futex.h [new file with mode: 0644]
arch/riscv/include/asm/processor.h
arch/riscv/include/asm/smp.h
arch/riscv/include/asm/switch_to.h
arch/riscv/include/asm/tlbflush.h
arch/riscv/include/uapi/asm/elf.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/cacheinfo.c
arch/riscv/kernel/cpu.c
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/fpu.S [new file with mode: 0644]
arch/riscv/kernel/head.S
arch/riscv/kernel/irq.c
arch/riscv/kernel/mcount.S
arch/riscv/kernel/process.c
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/signal.c
arch/riscv/kernel/smp.c
arch/riscv/kernel/smpboot.c
arch/riscv/lib/Makefile
arch/riscv/mm/ioremap.c
drivers/clocksource/riscv_timer.c
drivers/irqchip/irq-sifive-plic.c
lib/Kconfig
lib/Makefile
lib/udivmoddi4.c [new file with mode: 0644]
lib/umoddi3.c [new file with mode: 0644]

index a344980287a519eec038d553c8ae4d0f82add8e3..fe451348ae571371f25887a1cfaa9616f43eef7c 100644 (file)
@@ -31,6 +31,7 @@ config RISCV
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_DMA_CONTIGUOUS
+       select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_GENERIC_DMA_COHERENT
        select HAVE_PERF_EVENTS
        select IRQ_DOMAIN
@@ -108,10 +109,12 @@ config ARCH_RV32I
        select GENERIC_LIB_ASHRDI3
        select GENERIC_LIB_LSHRDI3
        select GENERIC_LIB_UCMPDI2
+       select GENERIC_LIB_UMODDI3
 
 config ARCH_RV64I
        bool "RV64I"
        select 64BIT
+       select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000
        select HAVE_FUNCTION_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FTRACE_MCOUNT_RECORD
@@ -208,14 +211,61 @@ config RISCV_BASE_PMU
 
 endmenu
 
+config FPU
+       bool "FPU support"
+       default y
+       help
+         Say N here if you want to disable all floating-point related procedure
+         in the kernel.
+
+         If you don't know what to do here, say Y.
+
 endmenu
 
-menu "Kernel type"
+menu "Kernel features"
 
 source "kernel/Kconfig.hz"
 
 endmenu
 
+menu "Boot options"
+
+config CMDLINE_BOOL
+       bool "Built-in kernel command line"
+       help
+         For most platforms, it is firmware or second stage bootloader
+         that by default specifies the kernel command line options.
+         However, it might be necessary or advantageous to either override
+         the default kernel command line or add a few extra options to it.
+         For such cases, this option allows hardcoding command line options
+         directly into the kernel.
+
+         For that, choose 'Y' here and fill in the extra boot parameters
+         in CONFIG_CMDLINE.
+
+         The built-in options will be concatenated to the default command
+         line if CMDLINE_FORCE is set to 'N'. Otherwise, the default
+         command line will be ignored and replaced by the built-in string.
+
+config CMDLINE
+       string "Built-in kernel command string"
+       depends on CMDLINE_BOOL
+       default ""
+       help
+         Supply command-line options at build time by entering them here.
+
+config CMDLINE_FORCE
+       bool "Built-in command line overrides bootloader arguments"
+       depends on CMDLINE_BOOL
+       help
+         Set this option to 'Y' to have the kernel ignore the bootloader
+         or firmware command line.  Instead, the built-in command line
+         will be used exclusively.
+
+         If you don't know what to do here, say N.
+
+endmenu
+
 menu "Bus support"
 
 config PCI
index 3224ff6ecf6e381dc8c993ea63b04a1fe878f7ce..c5a72f17c46925268052597cb15ed8571107794b 100644 (file)
@@ -1,37 +1,2 @@
-
-config CMDLINE_BOOL
-       bool "Built-in kernel command line"
-       help
-         For most platforms, it is firmware or second stage bootloader
-         that by default specifies the kernel command line options.
-         However, it might be necessary or advantageous to either override
-         the default kernel command line or add a few extra options to it.
-         For such cases, this option allows hardcoding command line options
-         directly into the kernel.
-
-         For that, choose 'Y' here and fill in the extra boot parameters
-         in CONFIG_CMDLINE.
-
-         The built-in options will be concatenated to the default command
-         line if CMDLINE_FORCE is set to 'N'. Otherwise, the default
-         command line will be ignored and replaced by the built-in string.
-
-config CMDLINE
-       string "Built-in kernel command string"
-       depends on CMDLINE_BOOL
-       default ""
-       help
-         Supply command-line options at build time by entering them here.
-
-config CMDLINE_FORCE
-       bool "Built-in command line overrides bootloader arguments"
-       depends on CMDLINE_BOOL
-       help
-         Set this option to 'Y' to have the kernel ignore the bootloader
-         or firmware command line.  Instead, the built-in command line
-         will be used exclusively.
-
-         If you don't know what to do here, say N.
-
 config EARLY_PRINTK
        def_bool y
index 61ec42405ec9630082beb941cb58a963127c13ad..d10146197533affd63c3e7392ccd73a4d7ba2e27 100644 (file)
@@ -25,10 +25,7 @@ ifeq ($(CONFIG_ARCH_RV64I),y)
 
        KBUILD_CFLAGS += -mabi=lp64
        KBUILD_AFLAGS += -mabi=lp64
-       
-       KBUILD_CFLAGS   += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
 
-       KBUILD_MARCH = rv64im
        KBUILD_LDFLAGS += -melf64lriscv
 else
        BITS := 32
@@ -36,22 +33,20 @@ else
 
        KBUILD_CFLAGS += -mabi=ilp32
        KBUILD_AFLAGS += -mabi=ilp32
-       KBUILD_MARCH = rv32im
        KBUILD_LDFLAGS += -melf32lriscv
 endif
 
 KBUILD_CFLAGS += -Wall
 
-ifeq ($(CONFIG_RISCV_ISA_A),y)
-       KBUILD_ARCH_A = a
-endif
-ifeq ($(CONFIG_RISCV_ISA_C),y)
-       KBUILD_ARCH_C = c
-endif
-
-KBUILD_AFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)fd$(KBUILD_ARCH_C)
+# ISA string setting
+riscv-march-$(CONFIG_ARCH_RV32I)       := rv32im
+riscv-march-$(CONFIG_ARCH_RV64I)       := rv64im
+riscv-march-$(CONFIG_RISCV_ISA_A)      := $(riscv-march-y)a
+riscv-march-$(CONFIG_FPU)              := $(riscv-march-y)fd
+riscv-march-$(CONFIG_RISCV_ISA_C)      := $(riscv-march-y)c
+KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
+KBUILD_AFLAGS += -march=$(riscv-march-y)
 
-KBUILD_CFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)$(KBUILD_ARCH_C)
 KBUILD_CFLAGS += -mno-save-restore
 KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
 
index efdbe311e9363f03fbb6394b698e898a8687787d..6a646d9ea780a9f6c33f8796cbe6c09dbb69530a 100644 (file)
@@ -13,7 +13,6 @@ generic-y += errno.h
 generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
-generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hash.h
 generic-y += hw_irq.h
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
new file mode 100644 (file)
index 0000000..3b19eba
--- /dev/null
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2006  Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (c) 2018  Jim Wilson (jimw@sifive.com)
+ */
+
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
+
+#ifndef CONFIG_RISCV_ISA_A
+/*
+ * Use the generic interrupt disabling versions if the A extension
+ * is not supported.
+ */
+#ifdef CONFIG_SMP
+#error "Can't support generic futex calls without A extension on SMP"
+#endif
+#include <asm-generic/futex.h>
+
+#else /* CONFIG_RISCV_ISA_A */
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+#include <asm/asm.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)     \
+{                                                              \
+       uintptr_t tmp;                                          \
+       __enable_user_access();                                 \
+       __asm__ __volatile__ (                                  \
+       "1:     " insn "                                \n"     \
+       "2:                                             \n"     \
+       "       .section .fixup,\"ax\"                  \n"     \
+       "       .balign 4                               \n"     \
+       "3:     li %[r],%[e]                            \n"     \
+       "       jump 2b,%[t]                            \n"     \
+       "       .previous                               \n"     \
+       "       .section __ex_table,\"a\"               \n"     \
+       "       .balign " RISCV_SZPTR "                 \n"     \
+       "       " RISCV_PTR " 1b, 3b                    \n"     \
+       "       .previous                               \n"     \
+       : [r] "+r" (ret), [ov] "=&r" (oldval),                  \
+         [u] "+m" (*uaddr), [t] "=&r" (tmp)                    \
+       : [op] "Jr" (oparg), [e] "i" (-EFAULT)                  \
+       : "memory");                                            \
+       __disable_user_access();                                \
+}
+
+static inline int
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
+{
+       int oldval = 0, ret = 0;
+
+       pagefault_disable();
+
+       switch (op) {
+       case FUTEX_OP_SET:
+               __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_ADD:
+               __futex_atomic_op("amoadd.w.aqrl %[ov],%z[op],%[u]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_OR:
+               __futex_atomic_op("amoor.w.aqrl %[ov],%z[op],%[u]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_ANDN:
+               __futex_atomic_op("amoand.w.aqrl %[ov],%z[op],%[u]",
+                                 ret, oldval, uaddr, ~oparg);
+               break;
+       case FUTEX_OP_XOR:
+               __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       pagefault_enable();
+
+       if (!ret)
+               *oval = oldval;
+
+       return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+                             u32 oldval, u32 newval)
+{
+       int ret = 0;
+       u32 val;
+       uintptr_t tmp;
+
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       __enable_user_access();
+       __asm__ __volatile__ (
+       "1:     lr.w.aqrl %[v],%[u]                     \n"
+       "       bne %[v],%z[ov],3f                      \n"
+       "2:     sc.w.aqrl %[t],%z[nv],%[u]              \n"
+       "       bnez %[t],1b                            \n"
+       "3:                                             \n"
+       "       .section .fixup,\"ax\"                  \n"
+       "       .balign 4                               \n"
+       "4:     li %[r],%[e]                            \n"
+       "       jump 3b,%[t]                            \n"
+       "       .previous                               \n"
+       "       .section __ex_table,\"a\"               \n"
+       "       .balign " RISCV_SZPTR "                 \n"
+       "       " RISCV_PTR " 1b, 4b                    \n"
+       "       " RISCV_PTR " 2b, 4b                    \n"
+       "       .previous                               \n"
+       : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp)
+       : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT)
+       : "memory");
+       __disable_user_access();
+
+       *uval = val;
+       return ret;
+}
+
+#endif /* CONFIG_RISCV_ISA_A */
+#endif /* _ASM_FUTEX_H */
index 3fe4af8147d2d6bc692f9ad32f2015e77c01a4bd..50de774d827ae69ca88495747592c44fa08dd486 100644 (file)
@@ -88,7 +88,7 @@ static inline void wait_for_interrupt(void)
 }
 
 struct device_node;
-extern int riscv_of_processor_hart(struct device_node *node);
+int riscv_of_processor_hartid(struct device_node *node);
 
 extern void riscv_fill_hwcap(void);
 
index 36016845461dc8f3788891f32455736b56ffd7f3..41aa73b476f44c70c85f469fc49b6c1bd8f9b76a 100644 (file)
 #ifndef _ASM_RISCV_SMP_H
 #define _ASM_RISCV_SMP_H
 
-/* This both needs asm-offsets.h and is used when generating it. */
-#ifndef GENERATING_ASM_OFFSETS
-#include <asm/asm-offsets.h>
-#endif
-
 #include <linux/cpumask.h>
 #include <linux/irqreturn.h>
+#include <linux/thread_info.h>
+
+#define INVALID_HARTID ULONG_MAX
+/*
+ * Mapping between linux logical cpu index and hartid.
+ */
+extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
+#define cpuid_to_hartid_map(cpu)    __cpuid_to_hartid_map[cpu]
+
+struct seq_file;
 
 #ifdef CONFIG_SMP
 
+/* print IPI stats */
+void show_ipi_stats(struct seq_file *p, int prec);
+
 /* SMP initialization hook for setup_arch */
 void __init setup_smp(void);
 
@@ -33,14 +41,31 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
 /* Hook for the generic smp_call_function_single() routine. */
 void arch_send_call_function_single_ipi(int cpu);
 
+int riscv_hartid_to_cpuid(int hartid);
+void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
+
 /*
- * This is particularly ugly: it appears we can't actually get the definition
- * of task_struct here, but we need access to the CPU this task is running on.
- * Instead of using C we're using asm-offsets.h to get the current processor
- * ID.
+ * Obtains the hart ID of the currently executing task.  This relies on
+ * THREAD_INFO_IN_TASK, but we define that unconditionally.
  */
-#define raw_smp_processor_id() (*((int*)((char*)get_current() + TASK_TI_CPU)))
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
-#endif /* CONFIG_SMP */
+#else
+
+static inline void show_ipi_stats(struct seq_file *p, int prec)
+{
+}
 
+static inline int riscv_hartid_to_cpuid(int hartid)
+{
+       return 0;
+}
+
+static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
+                                             struct cpumask *out)
+{
+       cpumask_set_cpu(cpuid_to_hartid_map(0), out);
+}
+
+#endif /* CONFIG_SMP */
 #endif /* _ASM_RISCV_SMP_H */
index dd6b05bff75b6fd41b3fd12239ace6edcf022bad..733559083f24bf89fd67e5a7ff64061140eda021 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/ptrace.h>
 #include <asm/csr.h>
 
+#ifdef CONFIG_FPU
 extern void __fstate_save(struct task_struct *save_to);
 extern void __fstate_restore(struct task_struct *restore_from);
 
@@ -55,6 +56,14 @@ static inline void __switch_to_aux(struct task_struct *prev,
        fstate_restore(next, task_pt_regs(next));
 }
 
+extern bool has_fpu;
+#else
+#define has_fpu false
+#define fstate_save(task, regs) do { } while (0)
+#define fstate_restore(task, regs) do { } while (0)
+#define __switch_to_aux(__prev, __next) do { } while (0)
+#endif
+
 extern struct task_struct *__switch_to(struct task_struct *,
                                       struct task_struct *);
 
@@ -62,7 +71,8 @@ extern struct task_struct *__switch_to(struct task_struct *,
 do {                                                   \
        struct task_struct *__prev = (prev);            \
        struct task_struct *__next = (next);            \
-       __switch_to_aux(__prev, __next);                \
+       if (has_fpu)                                    \
+               __switch_to_aux(__prev, __next);        \
        ((last) = __switch_to(__prev, __next));         \
 } while (0)
 
index 85c2d8bae9571ab2c3557b34b04b59d3355733a6..54fee0cadb1ebf34cc2b91d3d78f127c0b18d177 100644 (file)
@@ -16,6 +16,7 @@
 #define _ASM_RISCV_TLBFLUSH_H
 
 #include <linux/mm_types.h>
+#include <asm/smp.h>
 
 /*
  * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
@@ -49,13 +50,22 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 
 #include <asm/sbi.h>
 
+static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
+                                    unsigned long size)
+{
+       struct cpumask hmask;
+
+       cpumask_clear(&hmask);
+       riscv_cpuid_to_hartid_mask(cmask, &hmask);
+       sbi_remote_sfence_vma(hmask.bits, start, size);
+}
+
 #define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
 #define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
 #define flush_tlb_range(vma, start, end) \
-       sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \
-                             start, (end) - (start))
+       remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
 #define flush_tlb_mm(mm) \
-       sbi_remote_sfence_vma(mm_cpumask(mm)->bits, 0, -1)
+       remote_sfence_vma(mm_cpumask(mm), 0, -1)
 
 #endif /* CONFIG_SMP */
 
index 1e0dfc36aab9e597aaf0d0fb3c99b2ac3dcae750..644a00ce6e2ec166fa324e09261750ddbc188112 100644 (file)
@@ -19,7 +19,10 @@ typedef unsigned long elf_greg_t;
 typedef struct user_regs_struct elf_gregset_t;
 #define ELF_NGREG (sizeof(elf_gregset_t) / sizeof(elf_greg_t))
 
+/* We don't support f without d, or q.  */
+typedef __u64 elf_fpreg_t;
 typedef union __riscv_fp_state elf_fpregset_t;
+#define ELF_NFPREG (sizeof(struct __riscv_d_ext_state) / sizeof(elf_fpreg_t))
 
 #if __riscv_xlen == 64
 #define ELF_RISCV_R_SYM(r_info)                ELF64_R_SYM(r_info)
index e1274fc03af42de6b0a98fe0911e98d14db4dec8..f13f7f276639d504679034a36c53edc15f25dfe1 100644 (file)
@@ -31,6 +31,7 @@ obj-y += vdso/
 
 CFLAGS_setup.o := -mcmodel=medany
 
+obj-$(CONFIG_FPU)              += fpu.o
 obj-$(CONFIG_SMP)              += smpboot.o
 obj-$(CONFIG_SMP)              += smp.o
 obj-$(CONFIG_MODULES)          += module.o
index 0bc86e5f8f3fbbb0832a838a98af3a694bbf66b5..cb35ffd8ec6be590ef965a946d1b89f78b09eda8 100644 (file)
@@ -22,13 +22,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
 {
        this_leaf->level = level;
        this_leaf->type = type;
-       /* not a sector cache */
-       this_leaf->physical_line_partition = 1;
-       /* TODO: Add to DTS */
-       this_leaf->attributes =
-               CACHE_WRITE_BACK
-               | CACHE_READ_ALLOCATE
-               | CACHE_WRITE_ALLOCATE;
 }
 
 static int __init_cache_level(unsigned int cpu)
index ca6c81e54e3797141ec265676edce8f2871691fc..3a5a2ee31547b2ca1f3ec0f0cf613ff3448e85e3 100644 (file)
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/of.h>
+#include <asm/smp.h>
 
-/* Return -1 if not a valid hart */
-int riscv_of_processor_hart(struct device_node *node)
+/*
+ * Returns the hart ID of the given device tree node, or -1 if the device tree
+ * node isn't a RISC-V hart.
+ */
+int riscv_of_processor_hartid(struct device_node *node)
 {
        const char *isa, *status;
        u32 hart;
@@ -58,6 +62,64 @@ int riscv_of_processor_hart(struct device_node *node)
 
 #ifdef CONFIG_PROC_FS
 
+static void print_isa(struct seq_file *f, const char *orig_isa)
+{
+       static const char *ext = "mafdc";
+       const char *isa = orig_isa;
+       const char *e;
+
+       /*
+        * Linux doesn't support rv32e or rv128i, and we only support booting
+        * kernels on harts with the same ISA that the kernel is compiled for.
+        */
+#if defined(CONFIG_32BIT)
+       if (strncmp(isa, "rv32i", 5) != 0)
+               return;
+#elif defined(CONFIG_64BIT)
+       if (strncmp(isa, "rv64i", 5) != 0)
+               return;
+#endif
+
+       /* Print the base ISA, as we already know it's legal. */
+       seq_puts(f, "isa\t\t: ");
+       seq_write(f, isa, 5);
+       isa += 5;
+
+       /*
+        * Check the rest of the ISA string for valid extensions, printing those
+        * we find.  RISC-V ISA strings define an order, so we only print the
+        * extension bits when they're in order.
+        */
+       for (e = ext; *e != '\0'; ++e) {
+               if (isa[0] == e[0]) {
+                       seq_write(f, isa, 1);
+                       isa++;
+               }
+       }
+       seq_puts(f, "\n");
+
+       /*
+        * If we were given an unsupported ISA in the device tree then print
+        * a bit of info describing what went wrong.
+        */
+       if (isa[0] != '\0')
+               pr_info("unsupported ISA \"%s\" in device tree", orig_isa);
+}
+
+static void print_mmu(struct seq_file *f, const char *mmu_type)
+{
+#if defined(CONFIG_32BIT)
+       if (strcmp(mmu_type, "riscv,sv32") != 0)
+               return;
+#elif defined(CONFIG_64BIT)
+       if (strcmp(mmu_type, "riscv,sv39") != 0 &&
+           strcmp(mmu_type, "riscv,sv48") != 0)
+               return;
+#endif
+
+       seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+}
+
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
        *pos = cpumask_next(*pos - 1, cpu_online_mask);
@@ -78,21 +140,20 @@ static void c_stop(struct seq_file *m, void *v)
 
 static int c_show(struct seq_file *m, void *v)
 {
-       unsigned long hart_id = (unsigned long)v - 1;
-       struct device_node *node = of_get_cpu_node(hart_id, NULL);
+       unsigned long cpu_id = (unsigned long)v - 1;
+       struct device_node *node = of_get_cpu_node(cpuid_to_hartid_map(cpu_id),
+                                                  NULL);
        const char *compat, *isa, *mmu;
 
-       seq_printf(m, "hart\t: %lu\n", hart_id);
-       if (!of_property_read_string(node, "riscv,isa", &isa)
-           && isa[0] == 'r'
-           && isa[1] == 'v')
-               seq_printf(m, "isa\t: %s\n", isa);
-       if (!of_property_read_string(node, "mmu-type", &mmu)
-           && !strncmp(mmu, "riscv,", 6))
-               seq_printf(m, "mmu\t: %s\n", mmu+6);
+       seq_printf(m, "processor\t: %lu\n", cpu_id);
+       seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
+       if (!of_property_read_string(node, "riscv,isa", &isa))
+               print_isa(m, isa);
+       if (!of_property_read_string(node, "mmu-type", &mmu))
+               print_mmu(m, mmu);
        if (!of_property_read_string(node, "compatible", &compat)
            && strcmp(compat, "riscv"))
-               seq_printf(m, "uarch\t: %s\n", compat);
+               seq_printf(m, "uarch\t\t: %s\n", compat);
        seq_puts(m, "\n");
 
        return 0;
index 17011a870044c0beeee89d05491af5534f4562ff..5493f3228704740e913e0d5883d1cca99ba7f645 100644 (file)
@@ -22,6 +22,9 @@
 #include <asm/hwcap.h>
 
 unsigned long elf_hwcap __read_mostly;
+#ifdef CONFIG_FPU
+bool has_fpu __read_mostly;
+#endif
 
 void riscv_fill_hwcap(void)
 {
@@ -57,5 +60,17 @@ void riscv_fill_hwcap(void)
        for (i = 0; i < strlen(isa); ++i)
                elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
 
+       /* We don't support systems with F but without D, so mask those out
+        * here. */
+       if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) {
+               pr_info("This kernel does not support systems with F but not D");
+               elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
+       }
+
        pr_info("elf_hwcap is 0x%lx", elf_hwcap);
+
+#ifdef CONFIG_FPU
+       if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
+               has_fpu = true;
+#endif
 }
index fa2c08e3c05e6ee74ea0258b62cdf6580f98a385..13d4826ab2a1d3610504bbf30ff6625507244ad2 100644 (file)
@@ -168,7 +168,6 @@ ENTRY(handle_exception)
 
        /* Handle interrupts */
        move a0, sp /* pt_regs */
-       move a1, s4 /* scause */
        tail do_IRQ
 1:
        /* Exceptions run with interrupts enabled */
@@ -357,93 +356,6 @@ ENTRY(__switch_to)
        ret
 ENDPROC(__switch_to)
 
-ENTRY(__fstate_save)
-       li  a2,  TASK_THREAD_F0
-       add a0, a0, a2
-       li t1, SR_FS
-       csrs sstatus, t1
-       frcsr t0
-       fsd f0,  TASK_THREAD_F0_F0(a0)
-       fsd f1,  TASK_THREAD_F1_F0(a0)
-       fsd f2,  TASK_THREAD_F2_F0(a0)
-       fsd f3,  TASK_THREAD_F3_F0(a0)
-       fsd f4,  TASK_THREAD_F4_F0(a0)
-       fsd f5,  TASK_THREAD_F5_F0(a0)
-       fsd f6,  TASK_THREAD_F6_F0(a0)
-       fsd f7,  TASK_THREAD_F7_F0(a0)
-       fsd f8,  TASK_THREAD_F8_F0(a0)
-       fsd f9,  TASK_THREAD_F9_F0(a0)
-       fsd f10, TASK_THREAD_F10_F0(a0)
-       fsd f11, TASK_THREAD_F11_F0(a0)
-       fsd f12, TASK_THREAD_F12_F0(a0)
-       fsd f13, TASK_THREAD_F13_F0(a0)
-       fsd f14, TASK_THREAD_F14_F0(a0)
-       fsd f15, TASK_THREAD_F15_F0(a0)
-       fsd f16, TASK_THREAD_F16_F0(a0)
-       fsd f17, TASK_THREAD_F17_F0(a0)
-       fsd f18, TASK_THREAD_F18_F0(a0)
-       fsd f19, TASK_THREAD_F19_F0(a0)
-       fsd f20, TASK_THREAD_F20_F0(a0)
-       fsd f21, TASK_THREAD_F21_F0(a0)
-       fsd f22, TASK_THREAD_F22_F0(a0)
-       fsd f23, TASK_THREAD_F23_F0(a0)
-       fsd f24, TASK_THREAD_F24_F0(a0)
-       fsd f25, TASK_THREAD_F25_F0(a0)
-       fsd f26, TASK_THREAD_F26_F0(a0)
-       fsd f27, TASK_THREAD_F27_F0(a0)
-       fsd f28, TASK_THREAD_F28_F0(a0)
-       fsd f29, TASK_THREAD_F29_F0(a0)
-       fsd f30, TASK_THREAD_F30_F0(a0)
-       fsd f31, TASK_THREAD_F31_F0(a0)
-       sw t0, TASK_THREAD_FCSR_F0(a0)
-       csrc sstatus, t1
-       ret
-ENDPROC(__fstate_save)
-
-ENTRY(__fstate_restore)
-       li  a2,  TASK_THREAD_F0
-       add a0, a0, a2
-       li t1, SR_FS
-       lw t0, TASK_THREAD_FCSR_F0(a0)
-       csrs sstatus, t1
-       fld f0,  TASK_THREAD_F0_F0(a0)
-       fld f1,  TASK_THREAD_F1_F0(a0)
-       fld f2,  TASK_THREAD_F2_F0(a0)
-       fld f3,  TASK_THREAD_F3_F0(a0)
-       fld f4,  TASK_THREAD_F4_F0(a0)
-       fld f5,  TASK_THREAD_F5_F0(a0)
-       fld f6,  TASK_THREAD_F6_F0(a0)
-       fld f7,  TASK_THREAD_F7_F0(a0)
-       fld f8,  TASK_THREAD_F8_F0(a0)
-       fld f9,  TASK_THREAD_F9_F0(a0)
-       fld f10, TASK_THREAD_F10_F0(a0)
-       fld f11, TASK_THREAD_F11_F0(a0)
-       fld f12, TASK_THREAD_F12_F0(a0)
-       fld f13, TASK_THREAD_F13_F0(a0)
-       fld f14, TASK_THREAD_F14_F0(a0)
-       fld f15, TASK_THREAD_F15_F0(a0)
-       fld f16, TASK_THREAD_F16_F0(a0)
-       fld f17, TASK_THREAD_F17_F0(a0)
-       fld f18, TASK_THREAD_F18_F0(a0)
-       fld f19, TASK_THREAD_F19_F0(a0)
-       fld f20, TASK_THREAD_F20_F0(a0)
-       fld f21, TASK_THREAD_F21_F0(a0)
-       fld f22, TASK_THREAD_F22_F0(a0)
-       fld f23, TASK_THREAD_F23_F0(a0)
-       fld f24, TASK_THREAD_F24_F0(a0)
-       fld f25, TASK_THREAD_F25_F0(a0)
-       fld f26, TASK_THREAD_F26_F0(a0)
-       fld f27, TASK_THREAD_F27_F0(a0)
-       fld f28, TASK_THREAD_F28_F0(a0)
-       fld f29, TASK_THREAD_F29_F0(a0)
-       fld f30, TASK_THREAD_F30_F0(a0)
-       fld f31, TASK_THREAD_F31_F0(a0)
-       fscsr t0
-       csrc sstatus, t1
-       ret
-ENDPROC(__fstate_restore)
-
-
        .section ".rodata"
        /* Exception vector table */
 ENTRY(excp_vect_table)
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
new file mode 100644 (file)
index 0000000..1defb06
--- /dev/null
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/csr.h>
+#include <asm/asm-offsets.h>
+
+ENTRY(__fstate_save)
+       li  a2,  TASK_THREAD_F0
+       add a0, a0, a2
+       li t1, SR_FS
+       csrs sstatus, t1
+       frcsr t0
+       fsd f0,  TASK_THREAD_F0_F0(a0)
+       fsd f1,  TASK_THREAD_F1_F0(a0)
+       fsd f2,  TASK_THREAD_F2_F0(a0)
+       fsd f3,  TASK_THREAD_F3_F0(a0)
+       fsd f4,  TASK_THREAD_F4_F0(a0)
+       fsd f5,  TASK_THREAD_F5_F0(a0)
+       fsd f6,  TASK_THREAD_F6_F0(a0)
+       fsd f7,  TASK_THREAD_F7_F0(a0)
+       fsd f8,  TASK_THREAD_F8_F0(a0)
+       fsd f9,  TASK_THREAD_F9_F0(a0)
+       fsd f10, TASK_THREAD_F10_F0(a0)
+       fsd f11, TASK_THREAD_F11_F0(a0)
+       fsd f12, TASK_THREAD_F12_F0(a0)
+       fsd f13, TASK_THREAD_F13_F0(a0)
+       fsd f14, TASK_THREAD_F14_F0(a0)
+       fsd f15, TASK_THREAD_F15_F0(a0)
+       fsd f16, TASK_THREAD_F16_F0(a0)
+       fsd f17, TASK_THREAD_F17_F0(a0)
+       fsd f18, TASK_THREAD_F18_F0(a0)
+       fsd f19, TASK_THREAD_F19_F0(a0)
+       fsd f20, TASK_THREAD_F20_F0(a0)
+       fsd f21, TASK_THREAD_F21_F0(a0)
+       fsd f22, TASK_THREAD_F22_F0(a0)
+       fsd f23, TASK_THREAD_F23_F0(a0)
+       fsd f24, TASK_THREAD_F24_F0(a0)
+       fsd f25, TASK_THREAD_F25_F0(a0)
+       fsd f26, TASK_THREAD_F26_F0(a0)
+       fsd f27, TASK_THREAD_F27_F0(a0)
+       fsd f28, TASK_THREAD_F28_F0(a0)
+       fsd f29, TASK_THREAD_F29_F0(a0)
+       fsd f30, TASK_THREAD_F30_F0(a0)
+       fsd f31, TASK_THREAD_F31_F0(a0)
+       sw t0, TASK_THREAD_FCSR_F0(a0)
+       csrc sstatus, t1
+       ret
+ENDPROC(__fstate_save)
+
+ENTRY(__fstate_restore)
+       li  a2,  TASK_THREAD_F0
+       add a0, a0, a2
+       li t1, SR_FS
+       lw t0, TASK_THREAD_FCSR_F0(a0)
+       csrs sstatus, t1
+       fld f0,  TASK_THREAD_F0_F0(a0)
+       fld f1,  TASK_THREAD_F1_F0(a0)
+       fld f2,  TASK_THREAD_F2_F0(a0)
+       fld f3,  TASK_THREAD_F3_F0(a0)
+       fld f4,  TASK_THREAD_F4_F0(a0)
+       fld f5,  TASK_THREAD_F5_F0(a0)
+       fld f6,  TASK_THREAD_F6_F0(a0)
+       fld f7,  TASK_THREAD_F7_F0(a0)
+       fld f8,  TASK_THREAD_F8_F0(a0)
+       fld f9,  TASK_THREAD_F9_F0(a0)
+       fld f10, TASK_THREAD_F10_F0(a0)
+       fld f11, TASK_THREAD_F11_F0(a0)
+       fld f12, TASK_THREAD_F12_F0(a0)
+       fld f13, TASK_THREAD_F13_F0(a0)
+       fld f14, TASK_THREAD_F14_F0(a0)
+       fld f15, TASK_THREAD_F15_F0(a0)
+       fld f16, TASK_THREAD_F16_F0(a0)
+       fld f17, TASK_THREAD_F17_F0(a0)
+       fld f18, TASK_THREAD_F18_F0(a0)
+       fld f19, TASK_THREAD_F19_F0(a0)
+       fld f20, TASK_THREAD_F20_F0(a0)
+       fld f21, TASK_THREAD_F21_F0(a0)
+       fld f22, TASK_THREAD_F22_F0(a0)
+       fld f23, TASK_THREAD_F23_F0(a0)
+       fld f24, TASK_THREAD_F24_F0(a0)
+       fld f25, TASK_THREAD_F25_F0(a0)
+       fld f26, TASK_THREAD_F26_F0(a0)
+       fld f27, TASK_THREAD_F27_F0(a0)
+       fld f28, TASK_THREAD_F28_F0(a0)
+       fld f29, TASK_THREAD_F29_F0(a0)
+       fld f30, TASK_THREAD_F30_F0(a0)
+       fld f31, TASK_THREAD_F31_F0(a0)
+       fscsr t0
+       csrc sstatus, t1
+       ret
+ENDPROC(__fstate_restore)
index c4d2c63f9a2927396f2d47c23e35a9fdedcd21f9..711190d473d41f47dd52f9fc4720df720ebf57be 100644 (file)
@@ -47,6 +47,8 @@ ENTRY(_start)
        /* Save hart ID and DTB physical address */
        mv s0, a0
        mv s1, a1
+       la a2, boot_cpu_hartid
+       REG_S a0, (a2)
 
        /* Initialize page tables and relocate to virtual addresses */
        la sp, init_thread_union + THREAD_SIZE
@@ -55,7 +57,7 @@ ENTRY(_start)
 
        /* Restore C environment */
        la tp, init_task
-       sw s0, TASK_TI_CPU(tp)
+       sw zero, TASK_TI_CPU(tp)
 
        la sp, init_thread_union
        li a0, ASM_THREAD_SIZE
index 0cfac48a1272d98f95cd0fcc6ffaa536d862b408..48e6b7db83a1d17db763e362a15500602b1711e7 100644 (file)
@@ -8,6 +8,8 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
+#include <linux/seq_file.h>
+#include <asm/smp.h>
 
 /*
  * Possible interrupt causes:
  */
 #define INTERRUPT_CAUSE_FLAG   (1UL << (__riscv_xlen - 1))
 
-asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long cause)
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+       show_ipi_stats(p, prec);
+       return 0;
+}
+
+asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
 
        irq_enter();
-       switch (cause & ~INTERRUPT_CAUSE_FLAG) {
+       switch (regs->scause & ~INTERRUPT_CAUSE_FLAG) {
        case INTERRUPT_CAUSE_TIMER:
                riscv_timer_interrupt();
                break;
index 5721624886a1cc8f0af054da3bec133b53de0381..8a5593ff9ff3da5c3ca80e14e8e9f87dc2e1211d 100644 (file)
@@ -75,7 +75,6 @@ ENTRY(return_to_handler)
        RESTORE_RET_ABI_STATE
        jalr    a1
 ENDPROC(return_to_handler)
-EXPORT_SYMBOL(return_to_handler)
 #endif
 
 #ifndef CONFIG_DYNAMIC_FTRACE
index d7c6ca7c95ae622bf5980e7f272b7efdfdb32e3f..bef19993ea92c4711eb0f93152e6260c70fe0fd9 100644 (file)
@@ -76,7 +76,9 @@ void show_regs(struct pt_regs *regs)
 void start_thread(struct pt_regs *regs, unsigned long pc,
        unsigned long sp)
 {
-       regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
+       regs->sstatus = SR_SPIE;
+       if (has_fpu)
+               regs->sstatus |= SR_FS_INITIAL;
        regs->sepc = pc;
        regs->sp = sp;
        set_fs(USER_DS);
@@ -84,12 +86,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
 
 void flush_thread(void)
 {
+#ifdef CONFIG_FPU
        /*
         * Reset FPU context
         *      frm: round to nearest, ties to even (IEEE default)
         *      fflags: accrued exceptions cleared
         */
        memset(&current->thread.fstate, 0, sizeof(current->thread.fstate));
+#endif
 }
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
index 9f82a7e34c648a370ec42f2e0bad711058e9baf2..60f1e02eed360780c93f25b70a2a4d484f983d9b 100644 (file)
@@ -28,6 +28,9 @@
 
 enum riscv_regset {
        REGSET_X,
+#ifdef CONFIG_FPU
+       REGSET_F,
+#endif
 };
 
 static int riscv_gpr_get(struct task_struct *target,
@@ -54,6 +57,45 @@ static int riscv_gpr_set(struct task_struct *target,
        return ret;
 }
 
+#ifdef CONFIG_FPU
+static int riscv_fpr_get(struct task_struct *target,
+                        const struct user_regset *regset,
+                        unsigned int pos, unsigned int count,
+                        void *kbuf, void __user *ubuf)
+{
+       int ret;
+       struct __riscv_d_ext_state *fstate = &target->thread.fstate;
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
+                                 offsetof(struct __riscv_d_ext_state, fcsr));
+       if (!ret) {
+               ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
+                                         offsetof(struct __riscv_d_ext_state, fcsr) +
+                                         sizeof(fstate->fcsr));
+       }
+
+       return ret;
+}
+
+static int riscv_fpr_set(struct task_struct *target,
+                        const struct user_regset *regset,
+                        unsigned int pos, unsigned int count,
+                        const void *kbuf, const void __user *ubuf)
+{
+       int ret;
+       struct __riscv_d_ext_state *fstate = &target->thread.fstate;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0,
+                                offsetof(struct __riscv_d_ext_state, fcsr));
+       if (!ret) {
+               ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0,
+                                        offsetof(struct __riscv_d_ext_state, fcsr) +
+                                        sizeof(fstate->fcsr));
+       }
+
+       return ret;
+}
+#endif
 
 static const struct user_regset riscv_user_regset[] = {
        [REGSET_X] = {
@@ -64,6 +106,16 @@ static const struct user_regset riscv_user_regset[] = {
                .get = &riscv_gpr_get,
                .set = &riscv_gpr_set,
        },
+#ifdef CONFIG_FPU
+       [REGSET_F] = {
+               .core_note_type = NT_PRFPREG,
+               .n = ELF_NFPREG,
+               .size = sizeof(elf_fpreg_t),
+               .align = sizeof(elf_fpreg_t),
+               .get = &riscv_fpr_get,
+               .set = &riscv_fpr_set,
+       },
+#endif
 };
 
 static const struct user_regset_view riscv_user_native_view = {
index b2d26d9d8489c8e8b6bba01adee0c573fd6564f8..2c290e6aaa6e414a965d7acc97604a26f75e7548 100644 (file)
@@ -81,6 +81,16 @@ EXPORT_SYMBOL(empty_zero_page);
 
 /* The lucky hart to first increment this variable will boot the other cores */
 atomic_t hart_lottery;
+unsigned long boot_cpu_hartid;
+
+unsigned long __cpuid_to_hartid_map[NR_CPUS] = {
+       [0 ... NR_CPUS-1] = INVALID_HARTID
+};
+
+void __init smp_setup_processor_id(void)
+{
+       cpuid_to_hartid_map(0) = boot_cpu_hartid;
+}
 
 #ifdef CONFIG_BLK_DEV_INITRD
 static void __init setup_initrd(void)
@@ -227,7 +237,10 @@ void __init setup_arch(char **cmdline_p)
        setup_bootmem();
        paging_init();
        unflatten_device_tree();
+
+#ifdef CONFIG_SWIOTLB
        swiotlb_init(1);
+#endif
 
 #ifdef CONFIG_SMP
        setup_smp();
index 718d0c984ef094dc5b3c02f8e0291910159ebc3e..f9b5e7e352ef7c489a582edf4982cfddeb1e457a 100644 (file)
@@ -37,45 +37,69 @@ struct rt_sigframe {
        struct ucontext uc;
 };
 
-static long restore_d_state(struct pt_regs *regs,
-       struct __riscv_d_ext_state __user *state)
+#ifdef CONFIG_FPU
+static long restore_fp_state(struct pt_regs *regs,
+                            union __riscv_fp_state *sc_fpregs)
 {
        long err;
+       struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+       size_t i;
+
        err = __copy_from_user(&current->thread.fstate, state, sizeof(*state));
-       if (likely(!err))
-               fstate_restore(current, regs);
+       if (unlikely(err))
+               return err;
+
+       fstate_restore(current, regs);
+
+       /* We support no other extension state at this time. */
+       for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+               u32 value;
+
+               err = __get_user(value, &sc_fpregs->q.reserved[i]);
+               if (unlikely(err))
+                       break;
+               if (value != 0)
+                       return -EINVAL;
+       }
+
        return err;
 }
 
-static long save_d_state(struct pt_regs *regs,
-       struct __riscv_d_ext_state __user *state)
+static long save_fp_state(struct pt_regs *regs,
+                         union __riscv_fp_state *sc_fpregs)
 {
+       long err;
+       struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+       size_t i;
+
        fstate_save(current, regs);
-       return __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+       err = __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+       if (unlikely(err))
+               return err;
+
+       /* We support no other extension state at this time. */
+       for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+               err = __put_user(0, &sc_fpregs->q.reserved[i]);
+               if (unlikely(err))
+                       break;
+       }
+
+       return err;
 }
+#else
+#define save_fp_state(task, regs) (0)
+#define restore_fp_state(task, regs) (0)
+#endif
 
 static long restore_sigcontext(struct pt_regs *regs,
        struct sigcontext __user *sc)
 {
        long err;
-       size_t i;
        /* sc_regs is structured the same as the start of pt_regs */
        err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs));
-       if (unlikely(err))
-               return err;
        /* Restore the floating-point state. */
-       err = restore_d_state(regs, &sc->sc_fpregs.d);
-       if (unlikely(err))
-               return err;
-       /* We support no other extension state at this time. */
-       for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) {
-               u32 value;
-               err = __get_user(value, &sc->sc_fpregs.q.reserved[i]);
-               if (unlikely(err))
-                       break;
-               if (value != 0)
-                       return -EINVAL;
-       }
+       if (has_fpu)
+               err |= restore_fp_state(regs, &sc->sc_fpregs);
        return err;
 }
 
@@ -124,14 +148,11 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
 {
        struct sigcontext __user *sc = &frame->uc.uc_mcontext;
        long err;
-       size_t i;
        /* sc_regs is structured the same as the start of pt_regs */
        err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
        /* Save the floating-point state. */
-       err |= save_d_state(regs, &sc->sc_fpregs.d);
-       /* We support no other extension state at this time. */
-       for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++)
-               err |= __put_user(0, &sc->sc_fpregs.q.reserved[i]);
+       if (has_fpu)
+               err |= save_fp_state(regs, &sc->sc_fpregs);
        return err;
 }
 
index 906fe21ea21bf3f10db2e460cfe2b4fbd21ac790..57b1383e5ef7480142ce857e600a6f0e73a0944e 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 
 #include <asm/sbi.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 
-/* A collection of single bit ipi messages.  */
-static struct {
-       unsigned long bits ____cacheline_aligned;
-} ipi_data[NR_CPUS] __cacheline_aligned;
-
 enum ipi_message_type {
        IPI_RESCHEDULE,
        IPI_CALL_FUNC,
        IPI_MAX
 };
 
+/* A collection of single bit ipi messages.  */
+static struct {
+       unsigned long stats[IPI_MAX] ____cacheline_aligned;
+       unsigned long bits ____cacheline_aligned;
+} ipi_data[NR_CPUS] __cacheline_aligned;
+
+int riscv_hartid_to_cpuid(int hartid)
+{
+       int i = -1;
+
+       for (i = 0; i < NR_CPUS; i++)
+               if (cpuid_to_hartid_map(i) == hartid)
+                       return i;
 
+       pr_err("Couldn't find cpu id for hartid [%d]\n", hartid);
+       BUG();
+       return i;
+}
+
+void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
+{
+       int cpu;
+
+       for_each_cpu(cpu, in)
+               cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
+}
 /* Unsupported */
 int setup_profiling_timer(unsigned int multiplier)
 {
@@ -48,6 +69,7 @@ int setup_profiling_timer(unsigned int multiplier)
 void riscv_software_interrupt(void)
 {
        unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
+       unsigned long *stats = ipi_data[smp_processor_id()].stats;
 
        /* Clear pending IPI */
        csr_clear(sip, SIE_SSIE);
@@ -62,11 +84,15 @@ void riscv_software_interrupt(void)
                if (ops == 0)
                        return;
 
-               if (ops & (1 << IPI_RESCHEDULE))
+               if (ops & (1 << IPI_RESCHEDULE)) {
+                       stats[IPI_RESCHEDULE]++;
                        scheduler_ipi();
+               }
 
-               if (ops & (1 << IPI_CALL_FUNC))
+               if (ops & (1 << IPI_CALL_FUNC)) {
+                       stats[IPI_CALL_FUNC]++;
                        generic_smp_call_function_interrupt();
+               }
 
                BUG_ON((ops >> IPI_MAX) != 0);
 
@@ -78,14 +104,36 @@ void riscv_software_interrupt(void)
 static void
 send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
 {
-       int i;
+       int cpuid, hartid;
+       struct cpumask hartid_mask;
 
+       cpumask_clear(&hartid_mask);
        mb();
-       for_each_cpu(i, to_whom)
-               set_bit(operation, &ipi_data[i].bits);
-
+       for_each_cpu(cpuid, to_whom) {
+               set_bit(operation, &ipi_data[cpuid].bits);
+               hartid = cpuid_to_hartid_map(cpuid);
+               cpumask_set_cpu(hartid, &hartid_mask);
+       }
        mb();
-       sbi_send_ipi(cpumask_bits(to_whom));
+       sbi_send_ipi(cpumask_bits(&hartid_mask));
+}
+
+static const char * const ipi_names[] = {
+       [IPI_RESCHEDULE]        = "Rescheduling interrupts",
+       [IPI_CALL_FUNC]         = "Function call interrupts",
+};
+
+void show_ipi_stats(struct seq_file *p, int prec)
+{
+       unsigned int cpu, i;
+
+       for (i = 0; i < IPI_MAX; i++) {
+               seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
+                          prec >= 4 ? " " : "");
+               for_each_online_cpu(cpu)
+                       seq_printf(p, "%10lu ", ipi_data[cpu].stats[i]);
+               seq_printf(p, " %s\n", ipi_names[i]);
+       }
 }
 
 void arch_send_call_function_ipi_mask(struct cpumask *mask)
@@ -127,7 +175,7 @@ void smp_send_reschedule(int cpu)
 void flush_icache_mm(struct mm_struct *mm, bool local)
 {
        unsigned int cpu;
-       cpumask_t others, *mask;
+       cpumask_t others, hmask, *mask;
 
        preempt_disable();
 
@@ -145,9 +193,11 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
         */
        cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
        local |= cpumask_empty(&others);
-       if (mm != current->active_mm || !local)
-               sbi_remote_fence_i(others.bits);
-       else {
+       if (mm != current->active_mm || !local) {
+               cpumask_clear(&hmask);
+               riscv_cpuid_to_hartid_mask(&others, &hmask);
+               sbi_remote_fence_i(hmask.bits);
+       } else {
                /*
                 * It's assumed that at least one strongly ordered operation is
                 * performed on this hart between setting a hart's cpumask bit
index 56abab6a9812457072f4948a353fd11b40f3cd2a..18cda0e8cf9414310e0ec594ee30360f7938808c 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/irq.h>
 #include <linux/of.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/mm.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -50,25 +51,33 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 void __init setup_smp(void)
 {
        struct device_node *dn = NULL;
-       int hart, im_okay_therefore_i_am = 0;
+       int hart;
+       bool found_boot_cpu = false;
+       int cpuid = 1;
 
        while ((dn = of_find_node_by_type(dn, "cpu"))) {
-               hart = riscv_of_processor_hart(dn);
-               if (hart >= 0) {
-                       set_cpu_possible(hart, true);
-                       set_cpu_present(hart, true);
-                       if (hart == smp_processor_id()) {
-                               BUG_ON(im_okay_therefore_i_am);
-                               im_okay_therefore_i_am = 1;
-                       }
+               hart = riscv_of_processor_hartid(dn);
+               if (hart < 0)
+                       continue;
+
+               if (hart == cpuid_to_hartid_map(0)) {
+                       BUG_ON(found_boot_cpu);
+                       found_boot_cpu = 1;
+                       continue;
                }
+
+               cpuid_to_hartid_map(cpuid) = hart;
+               set_cpu_possible(cpuid, true);
+               set_cpu_present(cpuid, true);
+               cpuid++;
        }
 
-       BUG_ON(!im_okay_therefore_i_am);
+       BUG_ON(!found_boot_cpu);
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
+       int hartid = cpuid_to_hartid_map(cpu);
        tidle->thread_info.cpu = cpu;
 
        /*
@@ -79,8 +88,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
         * the spinning harts that they can continue the boot process.
         */
        smp_mb();
-       __cpu_up_stack_pointer[cpu] = task_stack_page(tidle) + THREAD_SIZE;
-       __cpu_up_task_pointer[cpu] = tidle;
+       WRITE_ONCE(__cpu_up_stack_pointer[hartid],
+                 task_stack_page(tidle) + THREAD_SIZE);
+       WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
 
        while (!cpu_online(cpu))
                cpu_relax();
@@ -100,14 +110,22 @@ asmlinkage void __init smp_callin(void)
        struct mm_struct *mm = &init_mm;
 
        /* All kernel threads share the same mm context.  */
-       atomic_inc(&mm->mm_count);
+       mmgrab(mm);
        current->active_mm = mm;
 
        trap_init();
        notify_cpu_starting(smp_processor_id());
        set_cpu_online(smp_processor_id(), 1);
+       /*
+        * Remote TLB flushes are ignored while the CPU is offline, so emit
+        * a local TLB flush right now just in case.
+        */
        local_flush_tlb_all();
-       local_irq_enable();
+       /*
+        * Disable preemption before enabling interrupts, so we don't try to
+        * schedule a CPU that hasn't actually started yet.
+        */
        preempt_disable();
+       local_irq_enable();
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
index 445ec84f9a4796e90ca80d882300d24ff770b435..5739bd05d289e5034b5d9faee2baebb1be1251ed 100644 (file)
@@ -2,6 +2,7 @@ lib-y   += delay.o
 lib-y  += memcpy.o
 lib-y  += memset.o
 lib-y  += uaccess.o
-lib-y  += tishift.o
+
+lib-(CONFIG_64BIT) += tishift.o
 
 lib-$(CONFIG_32BIT) += udivdi3.o
index 70ef2724cdf61e5b2001f0ec6243b7f5e9c6bfaa..bd2f2db557cc54f182794284bb0d9d317cb2b36a 100644 (file)
@@ -42,7 +42,7 @@ static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size,
 
        /* Page-align mappings */
        offset = addr & (~PAGE_MASK);
-       addr &= PAGE_MASK;
+       addr -= offset;
        size = PAGE_ALIGN(size + offset);
 
        area = get_vm_area_caller(size, VM_IOREMAP, caller);
index 4e8b347e43e2ef385683a2bf33184c2cfa5ade1b..084e97dc10ed9fd8c89004ee16894e93db573ff4 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
+#include <asm/smp.h>
 #include <asm/sbi.h>
 
 /*
@@ -84,13 +85,16 @@ void riscv_timer_interrupt(void)
 
 static int __init riscv_timer_init_dt(struct device_node *n)
 {
-       int cpu_id = riscv_of_processor_hart(n), error;
+       int cpuid, hartid, error;
        struct clocksource *cs;
 
-       if (cpu_id != smp_processor_id())
+       hartid = riscv_of_processor_hartid(n);
+       cpuid = riscv_hartid_to_cpuid(hartid);
+
+       if (cpuid != smp_processor_id())
                return 0;
 
-       cs = per_cpu_ptr(&riscv_clocksource, cpu_id);
+       cs = per_cpu_ptr(&riscv_clocksource, cpuid);
        clocksource_register_hz(cs, riscv_timebase);
 
        error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING,
@@ -98,7 +102,7 @@ static int __init riscv_timer_init_dt(struct device_node *n)
                         riscv_timer_starting_cpu, riscv_timer_dying_cpu);
        if (error)
                pr_err("RISCV timer register failed [%d] for cpu = [%d]\n",
-                      error, cpu_id);
+                      error, cpuid);
        return error;
 }
 
index 532e9d68c70428bc65cd5285d9cecb4c1c535831..357e9daf94ae061dad439df9404f9898354d7930 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
+#include <asm/smp.h>
 
 /*
  * This driver implements a version of the RISC-V PLIC with the actual layout
@@ -176,7 +177,7 @@ static int plic_find_hart_id(struct device_node *node)
 {
        for (; node; node = node->parent) {
                if (of_device_is_compatible(node, "riscv"))
-                       return riscv_of_processor_hart(node);
+                       return riscv_of_processor_hartid(node);
        }
 
        return -1;
@@ -218,7 +219,7 @@ static int __init plic_init(struct device_node *node,
                struct of_phandle_args parent;
                struct plic_handler *handler;
                irq_hw_number_t hwirq;
-               int cpu;
+               int cpu, hartid;
 
                if (of_irq_parse_one(node, i, &parent)) {
                        pr_err("failed to parse parent for context %d.\n", i);
@@ -229,12 +230,13 @@ static int __init plic_init(struct device_node *node,
                if (parent.args[0] == -1)
                        continue;
 
-               cpu = plic_find_hart_id(parent.np);
-               if (cpu < 0) {
+               hartid = plic_find_hart_id(parent.np);
+               if (hartid < 0) {
                        pr_warn("failed to parse hart ID for context %d.\n", i);
                        continue;
                }
 
+               cpu = riscv_hartid_to_cpuid(hartid);
                handler = per_cpu_ptr(&plic_handlers, cpu);
                handler->present = true;
                handler->ctxid = i;
index a3928d4438b5008c1fa01470de11245d1557bc33..d82f20609939552b6708b15d8969819d65cb13f7 100644 (file)
@@ -621,3 +621,6 @@ config GENERIC_LIB_CMPDI2
 
 config GENERIC_LIB_UCMPDI2
        bool
+
+config GENERIC_LIB_UMODDI3
+       bool
index 423876446810942b93c9ebba6ffb9bf086b69c8d..56a8d9c23ef3c89534e03b90aa0573fc7020925b 100644 (file)
@@ -270,3 +270,4 @@ obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o
 obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
 obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
 obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_UMODDI3) += umoddi3.o udivmoddi4.o
diff --git a/lib/udivmoddi4.c b/lib/udivmoddi4.c
new file mode 100644 (file)
index 0000000..c08bc8a
--- /dev/null
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#include <linux/libgcc.h>
+
+#define count_leading_zeros(COUNT, X)   ((COUNT) = __builtin_clz(X))
+
+#define W_TYPE_SIZE 32
+
+#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)                                                \
+       do {                                                            \
+               unsigned long __x0, __x1, __x2, __x3;                   \
+               unsigned short __ul, __vl, __uh, __vh;                  \
+                                                                       \
+               __ul = __ll_lowpart(u);                                 \
+               __uh = __ll_highpart(u);                                \
+               __vl = __ll_lowpart(v);                                 \
+               __vh = __ll_highpart(v);                                \
+                                                                       \
+               __x0 = (unsigned long) __ul * __vl;                     \
+               __x1 = (unsigned long) __ul * __vh;                     \
+               __x2 = (unsigned long) __uh * __vl;                     \
+               __x3 = (unsigned long) __uh * __vh;                     \
+                                                                       \
+               __x1 += __ll_highpart(__x0);                            \
+               __x1 += __x2;                                           \
+               if (__x1 < __x2)                                        \
+                       __x3 += __ll_B;                                 \
+                                                                       \
+               (w1) = __x3 + __ll_highpart(__x1);                      \
+               (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+       } while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)                             \
+       do {                                                            \
+               unsigned long __x;                                      \
+               __x = (al) - (bl);                                      \
+               (sh) = (ah) - (bh) - (__x > (al));                      \
+               (sl) = __x;                                             \
+       } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging. */
+#define __udiv_qrnnd_c(q, r, n1, n0, d)                                        \
+       do {                                                            \
+               unsigned long __d1, __d0, __q1, __q0;                   \
+               unsigned long __r1, __r0, __m;                          \
+               __d1 = __ll_highpart(d);                                \
+               __d0 = __ll_lowpart(d);                         \
+                                                                       \
+               __r1 = (n1) % __d1;                                     \
+               __q1 = (n1) / __d1;                                     \
+               __m = (unsigned long) __q1 * __d0;                      \
+               __r1 = __r1 * __ll_B | __ll_highpart(n0);               \
+               if (__r1 < __m) {                                       \
+                       __q1--, __r1 += (d);                            \
+                       if (__r1 >= (d))                                \
+                               if (__r1 < __m)                         \
+                                       __q1--, __r1 += (d);            \
+               }                                                       \
+               __r1 -= __m;                                            \
+                                                                       \
+               __r0 = __r1 % __d1;                                     \
+               __q0 = __r1 / __d1;                                     \
+               __m = (unsigned long) __q0 * __d0;                      \
+               __r0 = __r0 * __ll_B | __ll_lowpart(n0);                \
+               if (__r0 < __m) {                                       \
+                       __q0--, __r0 += (d);                            \
+                       if (__r0 >= (d))                                \
+                               if (__r0 < __m)                         \
+                                       __q0--, __r0 += (d);            \
+               }                                                       \
+               __r0 -= __m;                                            \
+                                                                       \
+               (q) = (unsigned long) __q1 * __ll_B | __q0;             \
+               (r) = __r0;                                             \
+       } while (0)
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+unsigned long long __udivmoddi4(unsigned long long u, unsigned long long v,
+                               unsigned long long *rp)
+{
+       const DWunion nn = {.ll = u };
+       const DWunion dd = {.ll = v };
+       DWunion rr, ww;
+       unsigned long d0, d1, n0, n1, n2;
+       unsigned long q0 = 0, q1 = 0;
+       unsigned long b, bm;
+
+       d0 = dd.s.low;
+       d1 = dd.s.high;
+       n0 = nn.s.low;
+       n1 = nn.s.high;
+
+#if !UDIV_NEEDS_NORMALIZATION
+
+       if (d1 == 0) {
+               if (d0 > n1) {
+                       /* 0q = nn / 0D */
+
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+                       q1 = 0;
+
+                       /* Remainder in n0. */
+               } else {
+                       /* qq = NN / 0d */
+
+                       if (d0 == 0)
+                               /* Divide intentionally by zero. */
+                               d0 = 1 / d0;
+
+                       udiv_qrnnd(q1, n1, 0, n1, d0);
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+
+                       /* Remainder in n0. */
+               }
+
+               if (rp != 0) {
+                       rr.s.low = n0;
+                       rr.s.high = 0;
+                       *rp = rr.ll;
+               }
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+       if (d1 == 0) {
+               if (d0 > n1) {
+                       /* 0q = nn / 0D */
+
+                       count_leading_zeros(bm, d0);
+
+                       if (bm != 0) {
+                               /*
+                                * Normalize, i.e. make the most significant bit
+                                * of the denominator set.
+                                */
+
+                               d0 = d0 << bm;
+                               n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+                               n0 = n0 << bm;
+                       }
+
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+                       q1 = 0;
+
+                       /* Remainder in n0 >> bm. */
+               } else {
+                       /* qq = NN / 0d */
+
+                       if (d0 == 0)
+                               /* Divide intentionally by zero. */
+                               d0 = 1 / d0;
+
+                       count_leading_zeros(bm, d0);
+
+                       if (bm == 0) {
+                               /*
+                                * From (n1 >= d0) /\ (the most significant bit
+                                * of d0 is set), conclude (the most significant
+                                * bit of n1 is set) /\ (theleading quotient
+                                * digit q1 = 1).
+                                *
+                                * This special case is necessary, not an
+                                * optimization. (Shifts counts of W_TYPE_SIZE
+                                * are undefined.)
+                                */
+
+                               n1 -= d0;
+                               q1 = 1;
+                       } else {
+                               /* Normalize. */
+
+                               b = W_TYPE_SIZE - bm;
+
+                               d0 = d0 << bm;
+                               n2 = n1 >> b;
+                               n1 = (n1 << bm) | (n0 >> b);
+                               n0 = n0 << bm;
+
+                               udiv_qrnnd(q1, n1, n2, n1, d0);
+                       }
+
+                       /* n1 != d0... */
+
+                       udiv_qrnnd(q0, n0, n1, n0, d0);
+
+                       /* Remainder in n0 >> bm. */
+               }
+
+               if (rp != 0) {
+                       rr.s.low = n0 >> bm;
+                       rr.s.high = 0;
+                       *rp = rr.ll;
+               }
+
+#endif /* UDIV_NEEDS_NORMALIZATION */
+
+       } else {
+               if (d1 > n1) {
+                       /* 00 = nn / DD */
+
+                       q0 = 0;
+                       q1 = 0;
+
+                       /* Remainder in n1n0. */
+                       if (rp != 0) {
+                               rr.s.low = n0;
+                               rr.s.high = n1;
+                               *rp = rr.ll;
+                       }
+               } else {
+                       /* 0q = NN / dd */
+
+                       count_leading_zeros(bm, d1);
+                       if (bm == 0) {
+                               /*
+                                * From (n1 >= d1) /\ (the most significant bit
+                                * of d1 is set), conclude (the most significant
+                                * bit of n1 is set) /\ (the quotient digit q0 =
+                                * 0 or 1).
+                                *
+                                * This special case is necessary, not an
+                                * optimization.
+                                */
+
+                               /*
+                                * The condition on the next line takes
+                                * advantage of that n1 >= d1 (true due to
+                                * program flow).
+                                */
+                               if (n1 > d1 || n0 >= d0) {
+                                       q0 = 1;
+                                       sub_ddmmss(n1, n0, n1, n0, d1, d0);
+                               } else {
+                                       q0 = 0;
+                               }
+
+                               q1 = 0;
+
+                               if (rp != 0) {
+                                       rr.s.low = n0;
+                                       rr.s.high = n1;
+                                       *rp = rr.ll;
+                               }
+                       } else {
+                               unsigned long m1, m0;
+                               /* Normalize. */
+
+                               b = W_TYPE_SIZE - bm;
+
+                               d1 = (d1 << bm) | (d0 >> b);
+                               d0 = d0 << bm;
+                               n2 = n1 >> b;
+                               n1 = (n1 << bm) | (n0 >> b);
+                               n0 = n0 << bm;
+
+                               udiv_qrnnd(q0, n1, n2, n1, d1);
+                               umul_ppmm(m1, m0, q0, d0);
+
+                               if (m1 > n1 || (m1 == n1 && m0 > n0)) {
+                                       q0--;
+                                       sub_ddmmss(m1, m0, m1, m0, d1, d0);
+                               }
+
+                               q1 = 0;
+
+                               /* Remainder in (n1n0 - m1m0) >> bm. */
+                               if (rp != 0) {
+                                       sub_ddmmss(n1, n0, n1, n0, m1, m0);
+                                       rr.s.low = (n1 << b) | (n0 >> bm);
+                                       rr.s.high = n1 >> bm;
+                                       *rp = rr.ll;
+                               }
+                       }
+               }
+       }
+
+       ww.s.low = q0;
+       ww.s.high = q1;
+
+       return ww.ll;
+}
diff --git a/lib/umoddi3.c b/lib/umoddi3.c
new file mode 100644 (file)
index 0000000..d7bbf0f
--- /dev/null
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/libgcc.h>
+
+extern unsigned long long __udivmoddi4(unsigned long long u,
+                                      unsigned long long v,
+                                      unsigned long long *rp);
+
+unsigned long long __umoddi3(unsigned long long u, unsigned long long v)
+{
+       unsigned long long w;
+       (void)__udivmoddi4(u, v, &w);
+       return w;
+}
+EXPORT_SYMBOL(__umoddi3);