Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 Aug 2018 17:00:15 +0000 (10:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 Aug 2018 17:00:15 +0000 (10:00 -0700)
Pull crypto fix from Herbert Xu:
 "This fixes a performance regression in arm64 NEON crypto as well as a
  crash in x86 aegis/morus on unsupported CPUs"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: x86/aegis,morus - Fix and simplify CPUID checks
  crypto: arm64 - revert NEON yield for fast AEAD implementations

arch/arm64/crypto/aes-ce-ccm-core.S
arch/arm64/crypto/ghash-ce-core.S
arch/x86/crypto/aegis128-aesni-glue.c
arch/x86/crypto/aegis128l-aesni-glue.c
arch/x86/crypto/aegis256-aesni-glue.c
arch/x86/crypto/morus1280-avx2-glue.c
arch/x86/crypto/morus1280-sse2-glue.c
arch/x86/crypto/morus640-sse2-glue.c

index 88f5aef7934c77a5213fdc85a7f8fe435fc76e70..e3a375c4cb83c383242ac6b9cc8b3247939e0947 100644 (file)
         *                           u32 *macp, u8 const rk[], u32 rounds);
         */
 ENTRY(ce_aes_ccm_auth_data)
-       frame_push      7
-
-       mov     x19, x0
-       mov     x20, x1
-       mov     x21, x2
-       mov     x22, x3
-       mov     x23, x4
-       mov     x24, x5
-
-       ldr     w25, [x22]                      /* leftover from prev round? */
+       ldr     w8, [x3]                        /* leftover from prev round? */
        ld1     {v0.16b}, [x0]                  /* load mac */
-       cbz     w25, 1f
-       sub     w25, w25, #16
+       cbz     w8, 1f
+       sub     w8, w8, #16
        eor     v1.16b, v1.16b, v1.16b
-0:     ldrb    w7, [x20], #1                   /* get 1 byte of input */
-       subs    w21, w21, #1
-       add     w25, w25, #1
+0:     ldrb    w7, [x1], #1                    /* get 1 byte of input */
+       subs    w2, w2, #1
+       add     w8, w8, #1
        ins     v1.b[0], w7
        ext     v1.16b, v1.16b, v1.16b, #1      /* rotate in the input bytes */
        beq     8f                              /* out of input? */
-       cbnz    w25, 0b
+       cbnz    w8, 0b
        eor     v0.16b, v0.16b, v1.16b
-1:     ld1     {v3.4s}, [x23]                  /* load first round key */
-       prfm    pldl1strm, [x20]
-       cmp     w24, #12                        /* which key size? */
-       add     x6, x23, #16
-       sub     w7, w24, #2                     /* modified # of rounds */
+1:     ld1     {v3.4s}, [x4]                   /* load first round key */
+       prfm    pldl1strm, [x1]
+       cmp     w5, #12                         /* which key size? */
+       add     x6, x4, #16
+       sub     w7, w5, #2                      /* modified # of rounds */
        bmi     2f
        bne     5f
        mov     v5.16b, v3.16b
@@ -64,43 +55,33 @@ ENTRY(ce_aes_ccm_auth_data)
        ld1     {v5.4s}, [x6], #16              /* load next round key */
        bpl     3b
        aese    v0.16b, v4.16b
-       subs    w21, w21, #16                   /* last data? */
+       subs    w2, w2, #16                     /* last data? */
        eor     v0.16b, v0.16b, v5.16b          /* final round */
        bmi     6f
-       ld1     {v1.16b}, [x20], #16            /* load next input block */
+       ld1     {v1.16b}, [x1], #16             /* load next input block */
        eor     v0.16b, v0.16b, v1.16b          /* xor with mac */
-       beq     6f
-
-       if_will_cond_yield_neon
-       st1     {v0.16b}, [x19]                 /* store mac */
-       do_cond_yield_neon
-       ld1     {v0.16b}, [x19]                 /* reload mac */
-       endif_yield_neon
-
-       b       1b
-6:     st1     {v0.16b}, [x19]                 /* store mac */
+       bne     1b
+6:     st1     {v0.16b}, [x0]                  /* store mac */
        beq     10f
-       adds    w21, w21, #16
+       adds    w2, w2, #16
        beq     10f
-       mov     w25, w21
-7:     ldrb    w7, [x20], #1
+       mov     w8, w2
+7:     ldrb    w7, [x1], #1
        umov    w6, v0.b[0]
        eor     w6, w6, w7
-       strb    w6, [x19], #1
-       subs    w21, w21, #1
+       strb    w6, [x0], #1
+       subs    w2, w2, #1
        beq     10f
        ext     v0.16b, v0.16b, v0.16b, #1      /* rotate out the mac bytes */
        b       7b
-8:     mov     w7, w25
-       add     w25, w25, #16
+8:     mov     w7, w8
+       add     w8, w8, #16
 9:     ext     v1.16b, v1.16b, v1.16b, #1
        adds    w7, w7, #1
        bne     9b
        eor     v0.16b, v0.16b, v1.16b
-       st1     {v0.16b}, [x19]
-10:    str     w25, [x22]
-
-       frame_pop
+       st1     {v0.16b}, [x0]
+10:    str     w8, [x3]
        ret
 ENDPROC(ce_aes_ccm_auth_data)
 
@@ -145,29 +126,19 @@ ENTRY(ce_aes_ccm_final)
 ENDPROC(ce_aes_ccm_final)
 
        .macro  aes_ccm_do_crypt,enc
-       frame_push      8
-
-       mov     x19, x0
-       mov     x20, x1
-       mov     x21, x2
-       mov     x22, x3
-       mov     x23, x4
-       mov     x24, x5
-       mov     x25, x6
-
-       ldr     x26, [x25, #8]                  /* load lower ctr */
-       ld1     {v0.16b}, [x24]                 /* load mac */
-CPU_LE(        rev     x26, x26                )       /* keep swabbed ctr in reg */
+       ldr     x8, [x6, #8]                    /* load lower ctr */
+       ld1     {v0.16b}, [x5]                  /* load mac */
+CPU_LE(        rev     x8, x8                  )       /* keep swabbed ctr in reg */
 0:     /* outer loop */
-       ld1     {v1.8b}, [x25]                  /* load upper ctr */
-       prfm    pldl1strm, [x20]
-       add     x26, x26, #1
-       rev     x9, x26
-       cmp     w23, #12                        /* which key size? */
-       sub     w7, w23, #2                     /* get modified # of rounds */
+       ld1     {v1.8b}, [x6]                   /* load upper ctr */
+       prfm    pldl1strm, [x1]
+       add     x8, x8, #1
+       rev     x9, x8
+       cmp     w4, #12                         /* which key size? */
+       sub     w7, w4, #2                      /* get modified # of rounds */
        ins     v1.d[1], x9                     /* no carry in lower ctr */
-       ld1     {v3.4s}, [x22]                  /* load first round key */
-       add     x10, x22, #16
+       ld1     {v3.4s}, [x3]                   /* load first round key */
+       add     x10, x3, #16
        bmi     1f
        bne     4f
        mov     v5.16b, v3.16b
@@ -194,9 +165,9 @@ CPU_LE(     rev     x26, x26                )       /* keep swabbed ctr in reg */
        bpl     2b
        aese    v0.16b, v4.16b
        aese    v1.16b, v4.16b
-       subs    w21, w21, #16
-       bmi     7f                              /* partial block? */
-       ld1     {v2.16b}, [x20], #16            /* load next input block */
+       subs    w2, w2, #16
+       bmi     6f                              /* partial block? */
+       ld1     {v2.16b}, [x1], #16             /* load next input block */
        .if     \enc == 1
        eor     v2.16b, v2.16b, v5.16b          /* final round enc+mac */
        eor     v1.16b, v1.16b, v2.16b          /* xor with crypted ctr */
@@ -205,29 +176,18 @@ CPU_LE(   rev     x26, x26                )       /* keep swabbed ctr in reg */
        eor     v1.16b, v2.16b, v5.16b          /* final round enc */
        .endif
        eor     v0.16b, v0.16b, v2.16b          /* xor mac with pt ^ rk[last] */
-       st1     {v1.16b}, [x19], #16            /* write output block */
-       beq     5f
-
-       if_will_cond_yield_neon
-       st1     {v0.16b}, [x24]                 /* store mac */
-       do_cond_yield_neon
-       ld1     {v0.16b}, [x24]                 /* reload mac */
-       endif_yield_neon
-
-       b       0b
-5:
-CPU_LE(        rev     x26, x26                        )
-       st1     {v0.16b}, [x24]                 /* store mac */
-       str     x26, [x25, #8]                  /* store lsb end of ctr (BE) */
-
-6:     frame_pop
-       ret
-
-7:     eor     v0.16b, v0.16b, v5.16b          /* final round mac */
+       st1     {v1.16b}, [x0], #16             /* write output block */
+       bne     0b
+CPU_LE(        rev     x8, x8                  )
+       st1     {v0.16b}, [x5]                  /* store mac */
+       str     x8, [x6, #8]                    /* store lsb end of ctr (BE) */
+5:     ret
+
+6:     eor     v0.16b, v0.16b, v5.16b          /* final round mac */
        eor     v1.16b, v1.16b, v5.16b          /* final round enc */
-       st1     {v0.16b}, [x24]                 /* store mac */
-       add     w21, w21, #16                   /* process partial tail block */
-8:     ldrb    w9, [x20], #1                   /* get 1 byte of input */
+       st1     {v0.16b}, [x5]                  /* store mac */
+       add     w2, w2, #16                     /* process partial tail block */
+7:     ldrb    w9, [x1], #1                    /* get 1 byte of input */
        umov    w6, v1.b[0]                     /* get top crypted ctr byte */
        umov    w7, v0.b[0]                     /* get top mac byte */
        .if     \enc == 1
@@ -237,13 +197,13 @@ CPU_LE(   rev     x26, x26                        )
        eor     w9, w9, w6
        eor     w7, w7, w9
        .endif
-       strb    w9, [x19], #1                   /* store out byte */
-       strb    w7, [x24], #1                   /* store mac byte */
-       subs    w21, w21, #1
-       beq     6b
+       strb    w9, [x0], #1                    /* store out byte */
+       strb    w7, [x5], #1                    /* store mac byte */
+       subs    w2, w2, #1
+       beq     5b
        ext     v0.16b, v0.16b, v0.16b, #1      /* shift out mac byte */
        ext     v1.16b, v1.16b, v1.16b, #1      /* shift out ctr byte */
-       b       8b
+       b       7b
        .endm
 
        /*
index dcffb9e77589cd843bb04691a46d117013f12ba1..c723647b37db0387f58d3ea88f899147fdbc2727 100644 (file)
@@ -322,55 +322,41 @@ ENDPROC(pmull_ghash_update_p8)
        .endm
 
        .macro          pmull_gcm_do_crypt, enc
-       frame_push      10
+       ld1             {SHASH.2d}, [x4]
+       ld1             {XL.2d}, [x1]
+       ldr             x8, [x5, #8]                    // load lower counter
 
-       mov             x19, x0
-       mov             x20, x1
-       mov             x21, x2
-       mov             x22, x3
-       mov             x23, x4
-       mov             x24, x5
-       mov             x25, x6
-       mov             x26, x7
-       .if             \enc == 1
-       ldr             x27, [sp, #96]                  // first stacked arg
-       .endif
-
-       ldr             x28, [x24, #8]                  // load lower counter
-CPU_LE(        rev             x28, x28        )
-
-0:     mov             x0, x25
-       load_round_keys w26, x0
-       ld1             {SHASH.2d}, [x23]
-       ld1             {XL.2d}, [x20]
+       load_round_keys w7, x6
 
        movi            MASK.16b, #0xe1
        ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(        rev             x8, x8          )
        shl             MASK.2d, MASK.2d, #57
        eor             SHASH2.16b, SHASH2.16b, SHASH.16b
 
        .if             \enc == 1
-       ld1             {KS.16b}, [x27]
+       ldr             x10, [sp]
+       ld1             {KS.16b}, [x10]
        .endif
 
-1:     ld1             {CTR.8b}, [x24]                 // load upper counter
-       ld1             {INP.16b}, [x22], #16
-       rev             x9, x28
-       add             x28, x28, #1
-       sub             w19, w19, #1
+0:     ld1             {CTR.8b}, [x5]                  // load upper counter
+       ld1             {INP.16b}, [x3], #16
+       rev             x9, x8
+       add             x8, x8, #1
+       sub             w0, w0, #1
        ins             CTR.d[1], x9                    // set lower counter
 
        .if             \enc == 1
        eor             INP.16b, INP.16b, KS.16b        // encrypt input
-       st1             {INP.16b}, [x21], #16
+       st1             {INP.16b}, [x2], #16
        .endif
 
        rev64           T1.16b, INP.16b
 
-       cmp             w26, #12
-       b.ge            4f                              // AES-192/256?
+       cmp             w7, #12
+       b.ge            2f                              // AES-192/256?
 
-2:     enc_round       CTR, v21
+1:     enc_round       CTR, v21
 
        ext             T2.16b, XL.16b, XL.16b, #8
        ext             IN1.16b, T1.16b, T1.16b, #8
@@ -425,39 +411,27 @@ CPU_LE(   rev             x28, x28        )
 
        .if             \enc == 0
        eor             INP.16b, INP.16b, KS.16b
-       st1             {INP.16b}, [x21], #16
+       st1             {INP.16b}, [x2], #16
        .endif
 
-       cbz             w19, 3f
+       cbnz            w0, 0b
 
-       if_will_cond_yield_neon
-       st1             {XL.2d}, [x20]
-       .if             \enc == 1
-       st1             {KS.16b}, [x27]
-       .endif
-       do_cond_yield_neon
-       b               0b
-       endif_yield_neon
+CPU_LE(        rev             x8, x8          )
+       st1             {XL.2d}, [x1]
+       str             x8, [x5, #8]                    // store lower counter
 
-       b               1b
-
-3:     st1             {XL.2d}, [x20]
        .if             \enc == 1
-       st1             {KS.16b}, [x27]
+       st1             {KS.16b}, [x10]
        .endif
 
-CPU_LE(        rev             x28, x28        )
-       str             x28, [x24, #8]                  // store lower counter
-
-       frame_pop
        ret
 
-4:     b.eq            5f                              // AES-192?
+2:     b.eq            3f                              // AES-192?
        enc_round       CTR, v17
        enc_round       CTR, v18
-5:     enc_round       CTR, v19
+3:     enc_round       CTR, v19
        enc_round       CTR, v20
-       b               2b
+       b               1b
        .endm
 
        /*
index 5de7c0d46edfc56459280519e8987e28349baee9..acd11b3bf639e0a50013014e5eaeca6b11083c28 100644 (file)
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128_aesni_alg[] = {
        }
 };
 
-static const struct x86_cpu_id aesni_cpu_id[] = {
-       X86_FEATURE_MATCH(X86_FEATURE_AES),
-       X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
 static int __init crypto_aegis128_aesni_module_init(void)
 {
-       if (!x86_match_cpu(aesni_cpu_id))
+       if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+           !boot_cpu_has(X86_FEATURE_AES) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+           !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
                return -ENODEV;
 
        return crypto_register_aeads(crypto_aegis128_aesni_alg,
index 876e4866e63386ec0dd0231c6be35b9f94bc709c..2071c3d1ae07575143cc4d6262e92eaeef9ba560 100644 (file)
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128l_aesni_alg[] = {
        }
 };
 
-static const struct x86_cpu_id aesni_cpu_id[] = {
-       X86_FEATURE_MATCH(X86_FEATURE_AES),
-       X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
 static int __init crypto_aegis128l_aesni_module_init(void)
 {
-       if (!x86_match_cpu(aesni_cpu_id))
+       if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+           !boot_cpu_has(X86_FEATURE_AES) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+           !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
                return -ENODEV;
 
        return crypto_register_aeads(crypto_aegis128l_aesni_alg,
index 2b5dd3af8f4dc4c20caad2a96825576dc17fe2be..b5f2a8fd5a713ca986e2d3ef24aa1b69d421ced2 100644 (file)
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis256_aesni_alg[] = {
        }
 };
 
-static const struct x86_cpu_id aesni_cpu_id[] = {
-       X86_FEATURE_MATCH(X86_FEATURE_AES),
-       X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
 static int __init crypto_aegis256_aesni_module_init(void)
 {
-       if (!x86_match_cpu(aesni_cpu_id))
+       if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+           !boot_cpu_has(X86_FEATURE_AES) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+           !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
                return -ENODEV;
 
        return crypto_register_aeads(crypto_aegis256_aesni_alg,
index f111f36d26dce558ddb1e4ad5427a048011aded2..6634907d6ccdf17eb3c425337ca96bf746927ff7 100644 (file)
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
 
 MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
 
-static const struct x86_cpu_id avx2_cpu_id[] = {
-    X86_FEATURE_MATCH(X86_FEATURE_AVX2),
-    {}
-};
-MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
-
 static int __init crypto_morus1280_avx2_module_init(void)
 {
-       if (!x86_match_cpu(avx2_cpu_id))
+       if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+           !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
                return -ENODEV;
 
        return crypto_register_aeads(crypto_morus1280_avx2_algs,
index 839270aa713cab55dfebc55f9d02c53aa13c9f73..95cf857d2cbb1943ba8ce356c48416837f1655d9 100644 (file)
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
 
 MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
 
-static const struct x86_cpu_id sse2_cpu_id[] = {
-    X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-    {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
 static int __init crypto_morus1280_sse2_module_init(void)
 {
-       if (!x86_match_cpu(sse2_cpu_id))
+       if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+           !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
                return -ENODEV;
 
        return crypto_register_aeads(crypto_morus1280_sse2_algs,
index 26b47e2db8d2149c64b407ee10ccd4f013fcb542..615fb7bc9a323d949d038a8496125eab0c4bc4ba 100644 (file)
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
 
 MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
 
-static const struct x86_cpu_id sse2_cpu_id[] = {
-    X86_FEATURE_MATCH(X86_FEATURE_XMM2),
-    {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
 static int __init crypto_morus640_sse2_module_init(void)
 {
-       if (!x86_match_cpu(sse2_cpu_id))
+       if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+           !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+           !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
                return -ENODEV;
 
        return crypto_register_aeads(crypto_morus640_sse2_algs,