[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 2/4] crypto: Add RSA support

To: Xen-devel <xen-devel@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxxx
From: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>
Date: Tue, 6 May 2025 15:32:14 +0100
Cc: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Anthony PERARD <anthony.perard@xxxxxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Julien Grall <julien@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>
Delivery-date: Tue, 06 May 2025 14:32:54 +0000
List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
In preparation for adding support for livepatch signing, add support for
RSA crypto.

The RSA code is extracted from Nettle at tag nettle_3.2_release_20160128
(https://git.lysator.liu.se/nettle/nettle).

The MPI code is extracted from Linux at commit eef0df6a5953 (lib/mpi/*).

Signed-off-by: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx>
---
 xen/common/Makefile   |    1 +
 xen/common/mpi.c      | 1724 +++++++++++++++++++++++++++++++++++++++++
 xen/crypto/Makefile   |    1 +
 xen/crypto/rsa.c      |  194 +++++
 xen/include/xen/mpi.h |   63 ++
 xen/include/xen/rsa.h |   72 ++
 6 files changed, 2055 insertions(+)
 create mode 100644 xen/common/mpi.c
 create mode 100644 xen/crypto/rsa.c
 create mode 100644 xen/include/xen/mpi.h
 create mode 100644 xen/include/xen/rsa.h

diff --git a/xen/common/Makefile b/xen/common/Makefile
index 98f08730563f..ece6548bb072 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o livepatch_elf.o
 obj-$(CONFIG_LLC_COLORING) += llc-coloring.o
 obj-$(CONFIG_VM_EVENT) += mem_access.o
 obj-y += memory.o
+obj-y += mpi.o
 obj-y += multicall.o
 obj-y += notifier.o
 obj-$(CONFIG_NUMA) += numa.o
diff --git a/xen/common/mpi.c b/xen/common/mpi.c
new file mode 100644
index 000000000000..e3e3ecd42283
--- /dev/null
+++ b/xen/common/mpi.c
@@ -0,0 +1,1724 @@
+/* mpi-pow.c  -  MPI functions
+ *     Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *      Actually it's the same code with only minor changes in the
+ *      way the data is stored; this is to support the abstraction
+ *      of an optional secure memory allocation which may be used
+ *      to avoid revealing of sensitive data due to paging etc.
+ *      The GNU MP Library itself is published under the LGPL;
+ *      however I decided to publish this code under the plain GPL.
+ *
+ * mpi.c code extracted from linux @ eef0df6a5953, lib/mpi
+ */
+
+#include <xen/mpi.h>
+#include <xen/lib.h>
+#include <xen/err.h>
+#include <xen/xmalloc.h>
+#include <xen/string.h>
+#include <xen/bitops.h>
+#include <xen/bug.h>
+
+#define MAX_EXTERN_MPI_BITS 16384
+
+/* Define it to a value which is good on most machines.
+ * tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#define KARATSUBA_THRESHOLD 16
+
+typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
+typedef int mpi_size_t;                /* (must be a signed type) */
+
+/* Copy N limbs from S to D.  */
+#define MPN_COPY(d, s, n) \
+       do {                                    \
+               mpi_size_t _i;                  \
+               for (_i = 0; _i < (n); _i++)    \
+                       (d)[_i] = (s)[_i];      \
+       } while (0)
+
+#define MPN_COPY_DECR(d, s, n) \
+       do {                                    \
+               mpi_size_t _i;                  \
+               for (_i = (n)-1; _i >= 0; _i--) \
+                       (d)[_i] = (s)[_i];      \
+       } while (0)
+
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+       do {                                    \
+               int  _i;                        \
+               for (_i = 0; _i < (n); _i++)    \
+                       (d)[_i] = 0;            \
+       } while (0)
+
+#define MPN_NORMALIZE(d, n)  \
+       do {                                    \
+               while ((n) > 0) {               \
+                       if ((d)[(n)-1])         \
+                               break;          \
+                       (n)--;                  \
+               }                               \
+       } while (0)
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)         \
+       do {                                                    \
+               if ((size) < KARATSUBA_THRESHOLD)               \
+                       mul_n_basecase(prodp, up, vp, size);    \
+               else                                            \
+                       mul_n(prodp, up, vp, size, tspace);     \
+       } while (0);
+
+#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)             \
+       do {                                                    \
+               if ((size) < KARATSUBA_THRESHOLD)               \
+                       mpih_sqr_n_basecase(prodp, up, size);   \
+               else                                            \
+                       mpih_sqr_n(prodp, up, size, tspace);    \
+       } while (0);
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+       mpi_limb_t __x; \
+       __x = (al) + (bl); \
+       (sh) = (ah) + (bh) + (__x < (al)); \
+       (sl) = __x; \
+} while (0)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+       mpi_limb_t __x; \
+       __x = (al) - (bl); \
+       (sh) = (ah) - (bh) - (__x > (al)); \
+       (sl) = __x; \
+} while (0)
+
+#define __ll_B ((mpi_limb_t) 1 << (BITS_PER_MPI_LIMB / 2))
+#define __ll_lowpart(t) ((mpi_limb_t) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((mpi_limb_t) (t) >> (BITS_PER_MPI_LIMB / 2))
+
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+       mpi_limb_t __x0, __x1, __x2, __x3; \
+       unsigned int __ul, __vl, __uh, __vh; \
+       mpi_limb_t __u = (u), __v = (v); \
+       \
+       __ul = __ll_lowpart(__u); \
+       __uh = __ll_highpart(__u); \
+       __vl = __ll_lowpart(__v); \
+       __vh = __ll_highpart(__v); \
+       \
+       __x0 = (mpi_limb_t) __ul * __vl; \
+       __x1 = (mpi_limb_t) __ul * __vh; \
+       __x2 = (mpi_limb_t) __uh * __vl; \
+       __x3 = (mpi_limb_t) __uh * __vh; \
+       \
+       __x1 += __ll_highpart(__x0);/* this can't give carry */ \
+       __x1 += __x2;           /* but this indeed can */ \
+       if (__x1 < __x2)                /* did we get it? */ \
+       __x3 += __ll_B;         /* yes, add it in the proper pos. */ \
+       \
+       (w1) = __x3 + __ll_highpart(__x1); \
+       (w0) = (__ll_lowpart(__x1) << BITS_PER_MPI_LIMB/2) + 
__ll_lowpart(__x0); \
+} while (0)
+
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+       mpi_limb_t __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+       __d1 = __ll_highpart(d); \
+       __d0 = __ll_lowpart(d); \
+       \
+       __r1 = (n1) % __d1; \
+       __q1 = (n1) / __d1; \
+       __m = (mpi_limb_t) __q1 * __d0; \
+       __r1 = __r1 * __ll_B | __ll_highpart(n0); \
+       if (__r1 < __m) { \
+               __q1--, __r1 += (d); \
+               if (__r1 >= (d)) /* i.e. we didn't get carry when adding to 
__r1 */ \
+               if (__r1 < __m) \
+                       __q1--, __r1 += (d); \
+       } \
+       __r1 -= __m; \
+       \
+       __r0 = __r1 % __d1; \
+       __q0 = __r1 / __d1; \
+       __m = (mpi_limb_t) __q0 * __d0; \
+       __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+       if (__r0 < __m) { \
+               __q0--, __r0 += (d); \
+               if (__r0 >= (d)) \
+                       if (__r0 < __m) \
+                               __q0--, __r0 += (d); \
+       } \
+       __r0 -= __m; \
+       \
+       (q) = (mpi_limb_t) __q1 * __ll_B | __q0; \
+       (r) = __r0; \
+} while (0)
+
+struct karatsuba_ctx {
+       struct karatsuba_ctx *next;
+       mpi_ptr_t tspace;
+       mpi_size_t tspace_size;
+       mpi_ptr_t tp;
+       mpi_size_t tp_size;
+};
+
+static void mpi_normalize(MPI a);
+static mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                           mpi_size_t s1_size, mpi_limb_t s2_limb);
+static mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+                         mpi_ptr_t np, mpi_size_t nsize,
+                         mpi_ptr_t dp, mpi_size_t dsize);
+static mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                         unsigned cnt);
+static void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
+static mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
+static void mpi_free_limb_space(mpi_ptr_t a);
+static mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                           mpi_size_t s1_size, mpi_limb_t s2_limb);
+static int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+               mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
+static mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                         unsigned cnt);
+static int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
+static void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+               mpi_ptr_t tspace);
+static mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                        mpi_ptr_t s2_ptr, mpi_size_t size);
+static mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                        mpi_ptr_t s2_ptr, mpi_size_t size);
+static mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                        mpi_size_t s1_size, mpi_limb_t s2_limb);
+static void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx);
+static void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t 
size);
+static int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+                              mpi_ptr_t up, mpi_size_t usize,
+                              mpi_ptr_t vp, mpi_size_t vsize,
+                              struct karatsuba_ctx *ctx);
+static int mpi_resize(MPI a, unsigned nlimbs);
+
+/**
+ * count_leading_zeros - Count the number of zeros from the MSB back
+ * @x: The value
+ *
+ * Count the number of leading zeros from the MSB going towards the LSB in @x.
+ *
+ * If the MSB of @x is set, the result is 0.
+ * If only the LSB of @x is set, then the result is BITS_PER_LONG-1.
+ * If @x is 0 then the result is BITS_PER_LONG.
+ */
+static inline int count_leading_zeros(unsigned long x)
+{
+       if (sizeof(x) == 4)
+               return BITS_PER_LONG - fls(x);
+       else
+               return BITS_PER_LONG - fls64(x);
+}
+
+static mpi_limb_t
+mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+             mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+       mpi_limb_t x;
+
+       x = *s1_ptr++;
+       s2_limb += x;
+       *res_ptr++ = s2_limb;
+       if (s2_limb < x) {      /* sum is less than the left operand: handle 
carry */
+               while (--s1_size) {
+                       x = *s1_ptr++ + 1;      /* add carry */
+                       *res_ptr++ = x; /* and store */
+                       if (x)  /* not 0 (no overflow): we can stop */
+                               goto leave;
+               }
+               return 1;       /* return carry (size of s1 to small) */
+       }
+
+leave:
+       if (res_ptr != s1_ptr) {        /* not the same variable */
+               mpi_size_t i;   /* copy the rest */
+               for (i = 0; i < s1_size - 1; i++)
+                       res_ptr[i] = s1_ptr[i];
+       }
+       return 0;               /* no carry */
+}
+
+static mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+             mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+       mpi_limb_t x;
+
+       x = *s1_ptr++;
+       s2_limb = x - s2_limb;
+       *res_ptr++ = s2_limb;
+       if (s2_limb > x) {
+               while (--s1_size) {
+                       x = *s1_ptr++;
+                       *res_ptr++ = x - 1;
+                       if (x)
+                               goto leave;
+               }
+               return 1;
+       }
+
+leave:
+       if (res_ptr != s1_ptr) {
+               mpi_size_t i;
+               for (i = 0; i < s1_size - 1; i++)
+                       res_ptr[i] = s1_ptr[i];
+       }
+       return 0;
+}
+
+static mpi_limb_t
+mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+           mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+       mpi_limb_t cy = 0;
+
+       if (s2_size)
+               cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+       if (s1_size - s2_size)
+               cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+                                  s1_size - s2_size, cy);
+       return cy;
+}
+
+static mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs)
+{
+       size_t len = nlimbs * sizeof(mpi_limb_t);
+
+       if (!len)
+               return NULL;
+
+       return xmalloc_bytes(len);
+}
+
+static void mpi_free_limb_space(mpi_ptr_t a)
+{
+       if (!a)
+               return;
+
+       xfree(a);
+}
+
+/****************
+ * Resize the array of A to NLIMBS. the additional space is cleared
+ * (set to 0) [done by m_realloc()]
+ */
+static int mpi_resize(MPI a, unsigned nlimbs)
+{
+       void *p;
+
+       if (nlimbs <= a->alloced)
+               return 0;       /* no need to do it */
+
+       if (a->d) {
+               p = xmalloc_array(mpi_limb_t, nlimbs);
+               if (!p)
+                       return -ENOMEM;
+               memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
+               xfree(a->d);
+               a->d = p;
+       } else {
+               a->d = xzalloc_array(mpi_limb_t, nlimbs);
+               if (!a->d)
+                       return -ENOMEM;
+       }
+       a->alloced = nlimbs;
+       return 0;
+}
+
+/****************
+ * RES = BASE ^ EXP mod MOD
+ */
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
+{
+       mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
+       mpi_ptr_t xp_marker = NULL;
+       mpi_ptr_t tspace = NULL;
+       mpi_ptr_t rp, ep, mp, bp;
+       mpi_size_t esize, msize, bsize, rsize;
+       int msign, bsign, rsign;
+       mpi_size_t size;
+       int mod_shift_cnt;
+       int negative_result;
+       int assign_rp = 0;
+       mpi_size_t tsize = 0;   /* to avoid compiler warning */
+       /* fixme: we should check that the warning is void */
+       int rc = -ENOMEM;
+
+       esize = exp->nlimbs;
+       msize = mod->nlimbs;
+       size = 2 * msize;
+       msign = mod->sign;
+
+       rp = res->d;
+       ep = exp->d;
+
+       if (!msize)
+               return -EINVAL;
+
+       if (!esize) {
+               /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+                * depending on if MOD equals 1.  */
+               rp[0] = 1;
+               res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+               res->sign = 0;
+               goto leave;
+       }
+
+       /* Normalize MOD (i.e. make its most significant bit set) as required by
+        * mpn_divrem.  This will make the intermediate values in the 
calculation
+        * slightly larger, but the correct result is obtained after a final
+        * reduction using the original MOD value.  */
+       mp = mp_marker = mpi_alloc_limb_space(msize);
+       if (!mp)
+               goto enomem;
+       mod_shift_cnt = count_leading_zeros(mod->d[msize - 1]);
+       if (mod_shift_cnt)
+               mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt);
+       else
+               MPN_COPY(mp, mod->d, msize);
+
+       bsize = base->nlimbs;
+       bsign = base->sign;
+       if (bsize > msize) {    /* The base is larger than the module. Reduce 
it. */
+               /* Allocate (BSIZE + 1) with space for remainder and quotient.
+                * (The quotient is (bsize - msize + 1) limbs.)  */
+               bp = bp_marker = mpi_alloc_limb_space(bsize + 1);
+               if (!bp)
+                       goto enomem;
+               MPN_COPY(bp, base->d, bsize);
+               /* We don't care about the quotient, store it above the 
remainder,
+                * at BP + MSIZE.  */
+               mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize);
+               bsize = msize;
+               /* Canonicalize the base, since we are going to multiply with it
+                * quite a few times.  */
+               MPN_NORMALIZE(bp, bsize);
+       } else
+               bp = base->d;
+
+       if (!bsize) {
+               res->nlimbs = 0;
+               res->sign = 0;
+               goto leave;
+       }
+
+       if (res->alloced < size) {
+               /* We have to allocate more space for RES.  If any of the input
+                * parameters are identical to RES, defer deallocation of the 
old
+                * space.  */
+               if (rp == ep || rp == mp || rp == bp) {
+                       rp = mpi_alloc_limb_space(size);
+                       if (!rp)
+                               goto enomem;
+                       assign_rp = 1;
+               } else {
+                       if (mpi_resize(res, size) < 0)
+                               goto enomem;
+                       rp = res->d;
+               }
+       } else {                /* Make BASE, EXP and MOD not overlap with RES. 
 */
+               if (rp == bp) {
+                       /* RES and BASE are identical.  Allocate temp. space 
for BASE.  */
+                       BUG_ON(bp_marker);
+                       bp = bp_marker = mpi_alloc_limb_space(bsize);
+                       if (!bp)
+                               goto enomem;
+                       MPN_COPY(bp, rp, bsize);
+               }
+               if (rp == ep) {
+                       /* RES and EXP are identical.  Allocate temp. space for 
EXP.  */
+                       ep = ep_marker = mpi_alloc_limb_space(esize);
+                       if (!ep)
+                               goto enomem;
+                       MPN_COPY(ep, rp, esize);
+               }
+               if (rp == mp) {
+                       /* RES and MOD are identical.  Allocate temporary space 
for MOD. */
+                       BUG_ON(mp_marker);
+                       mp = mp_marker = mpi_alloc_limb_space(msize);
+                       if (!mp)
+                               goto enomem;
+                       MPN_COPY(mp, rp, msize);
+               }
+       }
+
+       MPN_COPY(rp, bp, bsize);
+       rsize = bsize;
+       rsign = bsign;
+
+       {
+               mpi_size_t i;
+               mpi_ptr_t xp;
+               int c;
+               mpi_limb_t e;
+               mpi_limb_t carry_limb;
+               struct karatsuba_ctx karactx;
+
+               xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
+               if (!xp)
+                       goto enomem;
+
+               memset(&karactx, 0, sizeof karactx);
+               negative_result = (ep[0] & 1) && base->sign;
+
+               i = esize - 1;
+               e = ep[i];
+               c = count_leading_zeros(e);
+               e = (e << c) << 1;      /* shift the exp bits to the left, lose 
msb */
+               c = BITS_PER_MPI_LIMB - 1 - c;
+
+               /* Main loop.
+                *
+                * Make the result be pointed to alternately by XP and RP.  This
+                * helps us avoid block copying, which would otherwise be 
necessary
+                * with the overlap restrictions of mpihelp_divmod. With 50% 
probability
+                * the result after this loop will be in the area originally 
pointed
+                * by RP (==RES->d), and with 50% probability in the area 
originally
+                * pointed to by XP.
+                */
+
+               for (;;) {
+                       while (c) {
+                               mpi_ptr_t tp;
+                               mpi_size_t xsize;
+
+                               /*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) 
goto enomem */
+                               if (rsize < KARATSUBA_THRESHOLD)
+                                       mpih_sqr_n_basecase(xp, rp, rsize);
+                               else {
+                                       if (!tspace) {
+                                               tsize = 2 * rsize;
+                                               tspace =
+                                                   mpi_alloc_limb_space(tsize);
+                                               if (!tspace)
+                                                       goto enomem;
+                                       } else if (tsize < (2 * rsize)) {
+                                               mpi_free_limb_space(tspace);
+                                               tsize = 2 * rsize;
+                                               tspace =
+                                                   mpi_alloc_limb_space(tsize);
+                                               if (!tspace)
+                                                       goto enomem;
+                                       }
+                                       mpih_sqr_n(xp, rp, rsize, tspace);
+                               }
+
+                               xsize = 2 * rsize;
+                               if (xsize > msize) {
+                                       mpihelp_divrem(xp + msize, 0, xp, xsize,
+                                                      mp, msize);
+                                       xsize = msize;
+                               }
+
+                               tp = rp;
+                               rp = xp;
+                               xp = tp;
+                               rsize = xsize;
+
+                               if ((mpi_limb_signed_t) e < 0) {
+                                       /*mpihelp_mul( xp, rp, rsize, bp, bsize 
); */
+                                       if (bsize < KARATSUBA_THRESHOLD) {
+                                               mpi_limb_t tmp;
+                                               if (mpihelp_mul
+                                                   (xp, rp, rsize, bp, bsize,
+                                                    &tmp) < 0)
+                                                       goto enomem;
+                                       } else {
+                                               if (mpihelp_mul_karatsuba_case
+                                                   (xp, rp, rsize, bp, bsize,
+                                                    &karactx) < 0)
+                                                       goto enomem;
+                                       }
+
+                                       xsize = rsize + bsize;
+                                       if (xsize > msize) {
+                                               mpihelp_divrem(xp + msize, 0,
+                                                              xp, xsize, mp,
+                                                              msize);
+                                               xsize = msize;
+                                       }
+
+                                       tp = rp;
+                                       rp = xp;
+                                       xp = tp;
+                                       rsize = xsize;
+                               }
+                               e <<= 1;
+                               c--;
+                       }
+
+                       i--;
+                       if (i < 0)
+                               break;
+                       e = ep[i];
+                       c = BITS_PER_MPI_LIMB;
+               }
+
+               /* We shifted MOD, the modulo reduction argument, left 
MOD_SHIFT_CNT
+                * steps.  Adjust the result by reducing it with the original 
MOD.
+                *
+                * Also make sure the result is put in RES->d (where it already
+                * might be, see above).
+                */
+               if (mod_shift_cnt) {
+                       carry_limb =
+                           mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt);
+                       rp = res->d;
+                       if (carry_limb) {
+                               rp[rsize] = carry_limb;
+                               rsize++;
+                       }
+               } else {
+                       MPN_COPY(res->d, rp, rsize);
+                       rp = res->d;
+               }
+
+               if (rsize >= msize) {
+                       mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
+                       rsize = msize;
+               }
+
+               /* Remove any leading zero words from the result.  */
+               if (mod_shift_cnt)
+                       mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
+               MPN_NORMALIZE(rp, rsize);
+
+               mpihelp_release_karatsuba_ctx(&karactx);
+       }
+
+       if (negative_result && rsize) {
+               if (mod_shift_cnt)
+                       mpihelp_rshift(mp, mp, msize, mod_shift_cnt);
+               mpihelp_sub(rp, mp, msize, rp, rsize);
+               rsize = msize;
+               rsign = msign;
+               MPN_NORMALIZE(rp, rsize);
+       }
+       res->nlimbs = rsize;
+       res->sign = rsign;
+
+leave:
+       rc = 0;
+enomem:
+       if (assign_rp)
+               mpi_assign_limb_space(res, rp, size);
+       if (mp_marker)
+               mpi_free_limb_space(mp_marker);
+       if (bp_marker)
+               mpi_free_limb_space(bp_marker);
+       if (ep_marker)
+               mpi_free_limb_space(ep_marker);
+       if (xp_marker)
+               mpi_free_limb_space(xp_marker);
+       if (tspace)
+               mpi_free_limb_space(tspace);
+       return rc;
+}
+
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+ * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+ * always stored.  Return the most significant limb.
+ *
+ * Argument constraints:
+ * 1. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ *
+ *
+ * Handle simple cases with traditional multiplication.
+ *
+ * This is the most critical code of multiplication.  All multiplies rely
+ * on this, both small and huge.  Small ones arrive here immediately.  Huge
+ * ones arrive here as this is the base case for Karatsuba's recursive
+ * algorithm below.
+ */
+
+static mpi_limb_t
+mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+       mpi_size_t i;
+       mpi_limb_t cy;
+       mpi_limb_t v_limb;
+
+       /* Multiply by the first limb in V separately, as the result can be
+        * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+       v_limb = vp[0];
+       if (v_limb <= 1) {
+               if (v_limb == 1)
+                       MPN_COPY(prodp, up, size);
+               else
+                       MPN_ZERO(prodp, size);
+               cy = 0;
+       } else
+               cy = mpihelp_mul_1(prodp, up, size, v_limb);
+
+       prodp[size] = cy;
+       prodp++;
+
+       /* For each iteration in the outer loop, multiply one limb from
+        * U with one limb from V, and add it to PROD.  */
+       for (i = 1; i < size; i++) {
+               v_limb = vp[i];
+               if (v_limb <= 1) {
+                       cy = 0;
+                       if (v_limb == 1)
+                               cy = mpihelp_add_n(prodp, prodp, up, size);
+               } else
+                       cy = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+               prodp[size] = cy;
+               prodp++;
+       }
+
+       return cy;
+}
+
+static void
+mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+               mpi_size_t size, mpi_ptr_t tspace)
+{
+       if (size & 1) {
+               /* The size is odd, and the code below doesn't handle that.
+                * Multiply the least significant (size - 1) limbs with a 
recursive
+                * call, and handle the most significant limb of S1 and S2
+                * separately.
+                * A slightly faster way to do this would be to make the 
Karatsuba
+                * code below behave as if the size were even, and let it check 
for
+                * odd size in the end.  I.e., in essence move this code to the 
end.
+                * Doing so would save us a recursive call, and potentially 
make the
+                * stack grow a lot less.
+                */
+               mpi_size_t esize = size - 1;    /* even size */
+               mpi_limb_t cy_limb;
+
+               MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
+               cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
+               prodp[esize + esize] = cy_limb;
+               cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
+               prodp[esize + size] = cy_limb;
+       } else {
+               /* Anatolij Alekseevich Karatsuba's divide-and-conquer 
algorithm.
+                *
+                * Split U in two pieces, U1 and U0, such that
+                * U = U0 + U1*(B**n),
+                * and V in V1 and V0, such that
+                * V = V0 + V1*(B**n).
+                *
+                * UV is then computed recursively using the identity
+                *
+                *        2n   n          n                     n
+                * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+                *                1 1        1  0   0  1              0 0
+                *
+                * Where B = 2**BITS_PER_MP_LIMB.
+                */
+               mpi_size_t hsize = size >> 1;
+               mpi_limb_t cy;
+               int negflg;
+
+               /* Product H.      ________________  ________________
+                *                |_____U1 x V1____||____U0 x V0_____|
+                * Put result in upper part of PROD and pass low part of TSPACE
+                * as new TSPACE.
+                */
+               MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
+                                 tspace);
+
+               /* Product M.      ________________
+                *                |_(U1-U0)(V0-V1)_|
+                */
+               if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
+                       mpihelp_sub_n(prodp, up + hsize, up, hsize);
+                       negflg = 0;
+               } else {
+                       mpihelp_sub_n(prodp, up, up + hsize, hsize);
+                       negflg = 1;
+               }
+               if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
+                       mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
+                       negflg ^= 1;
+               } else {
+                       mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
+                       /* No change of NEGFLG.  */
+               }
+               /* Read temporary operands from low part of PROD.
+                * Put result in low part of TSPACE using upper part of TSPACE
+                * as new TSPACE.
+                */
+               MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
+                                 tspace + size);
+
+               /* Add/copy product H. */
+               MPN_COPY(prodp + hsize, prodp + size, hsize);
+               cy = mpihelp_add_n(prodp + size, prodp + size,
+                                  prodp + size + hsize, hsize);
+
+               /* Add product M (if NEGFLG M is a negative number) */
+               if (negflg)
+                       cy -=
+                           mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
+                                         size);
+               else
+                       cy +=
+                           mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
+                                         size);
+
+               /* Product L.      ________________  ________________
+                *                |________________||____U0 x V0_____|
+                * Read temporary operands from low part of PROD.
+                * Put result in low part of TSPACE using upper part of TSPACE
+                * as new TSPACE.
+                */
+               MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
+
+               /* Add/copy Product L (twice) */
+
+               cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+               if (cy)
+                       mpihelp_add_1(prodp + hsize + size,
+                                     prodp + hsize + size, hsize, cy);
+
+               MPN_COPY(prodp, tspace, hsize);
+               cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+                                  hsize);
+               if (cy)
+                       mpihelp_add_1(prodp + size, prodp + size, size, 1);
+       }
+}
+
+static void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
+{
+       mpi_size_t i;
+       mpi_limb_t cy_limb;
+       mpi_limb_t v_limb;
+
+       /* Multiply by the first limb in V separately, as the result can be
+        * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+       v_limb = up[0];
+       if (v_limb <= 1) {
+               if (v_limb == 1)
+                       MPN_COPY(prodp, up, size);
+               else
+                       MPN_ZERO(prodp, size);
+               cy_limb = 0;
+       } else
+               cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
+
+       prodp[size] = cy_limb;
+       prodp++;
+
+       /* For each iteration in the outer loop, multiply one limb from
+        * U with one limb from V, and add it to PROD.  */
+       for (i = 1; i < size; i++) {
+               v_limb = up[i];
+               if (v_limb <= 1) {
+                       cy_limb = 0;
+                       if (v_limb == 1)
+                               cy_limb = mpihelp_add_n(prodp, prodp, up, size);
+               } else
+                       cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+               prodp[size] = cy_limb;
+               prodp++;
+       }
+}
+
+static void
+mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+{
+       if (size & 1) {
+               /* The size is odd, and the code below doesn't handle that.
+                * Multiply the least significant (size - 1) limbs with a 
recursive
+                * call, and handle the most significant limb of S1 and S2
+                * separately.
+                * A slightly faster way to do this would be to make the 
Karatsuba
+                * code below behave as if the size were even, and let it check 
for
+                * odd size in the end.  I.e., in essence move this code to the 
end.
+                * Doing so would save us a recursive call, and potentially 
make the
+                * stack grow a lot less.
+                */
+               mpi_size_t esize = size - 1;    /* even size */
+               mpi_limb_t cy_limb;
+
+               MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
+               cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
+               prodp[esize + esize] = cy_limb;
+               cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
+
+               prodp[esize + size] = cy_limb;
+       } else {
+               mpi_size_t hsize = size >> 1;
+               mpi_limb_t cy;
+
+               /* Product H.      ________________  ________________
+                *                |_____U1 x U1____||____U0 x U0_____|
+                * Put result in upper part of PROD and pass low part of TSPACE
+                * as new TSPACE.
+                */
+               MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
+
+               /* Product M.      ________________
+                *                |_(U1-U0)(U0-U1)_|
+                */
+               if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
+                       mpihelp_sub_n(prodp, up + hsize, up, hsize);
+               else
+                       mpihelp_sub_n(prodp, up, up + hsize, hsize);
+
+               /* Read temporary operands from low part of PROD.
+                * Put result in low part of TSPACE using upper part of TSPACE
+                * as new TSPACE.  */
+               MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
+
+               /* Add/copy product H  */
+               MPN_COPY(prodp + hsize, prodp + size, hsize);
+               cy = mpihelp_add_n(prodp + size, prodp + size,
+                                  prodp + size + hsize, hsize);
+
+               /* Add product M (if NEGFLG M is a negative number).  */
+               cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
+
+               /* Product L.      ________________  ________________
+                *                |________________||____U0 x U0_____|
+                * Read temporary operands from low part of PROD.
+                * Put result in low part of TSPACE using upper part of TSPACE
+                * as new TSPACE.  */
+               MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
+
+               /* Add/copy Product L (twice).  */
+               cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+               if (cy)
+                       mpihelp_add_1(prodp + hsize + size,
+                                     prodp + hsize + size, hsize, cy);
+
+               MPN_COPY(prodp, tspace, hsize);
+               cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+                                  hsize);
+               if (cy)
+                       mpihelp_add_1(prodp + size, prodp + size, size, 1);
+       }
+}
+
+static int
+mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+                          mpi_ptr_t up, mpi_size_t usize,
+                          mpi_ptr_t vp, mpi_size_t vsize,
+                          struct karatsuba_ctx *ctx)
+{
+       mpi_limb_t cy;
+
+       if (!ctx->tspace || ctx->tspace_size < vsize) {
+               if (ctx->tspace)
+                       mpi_free_limb_space(ctx->tspace);
+               ctx->tspace = mpi_alloc_limb_space(2 * vsize);
+               if (!ctx->tspace)
+                       return -ENOMEM;
+               ctx->tspace_size = vsize;
+       }
+
+       MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
+
+       prodp += vsize;
+       up += vsize;
+       usize -= vsize;
+       if (usize >= vsize) {
+               if (!ctx->tp || ctx->tp_size < vsize) {
+                       if (ctx->tp)
+                               mpi_free_limb_space(ctx->tp);
+                       ctx->tp = mpi_alloc_limb_space(2 * vsize);
+                       if (!ctx->tp) {
+                               if (ctx->tspace)
+                                       mpi_free_limb_space(ctx->tspace);
+                               ctx->tspace = NULL;
+                               return -ENOMEM;
+                       }
+                       ctx->tp_size = vsize;
+               }
+
+               do {
+                       MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
+                       cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
+                       mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
+                                     cy);
+                       prodp += vsize;
+                       up += vsize;
+                       usize -= vsize;
+               } while (usize >= vsize);
+       }
+
+       if (usize) {
+               if (usize < KARATSUBA_THRESHOLD) {
+                       mpi_limb_t tmp;
+                       if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
+                           < 0)
+                               return -ENOMEM;
+               } else {
+                       if (!ctx->next) {
+                               ctx->next = xzalloc(struct karatsuba_ctx);
+                               if (!ctx->next)
+                                       return -ENOMEM;
+                       }
+                       if (mpihelp_mul_karatsuba_case(ctx->tspace,
+                                                      vp, vsize,
+                                                      up, usize,
+                                                      ctx->next) < 0)
+                               return -ENOMEM;
+               }
+
+               cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
+               mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
+       }
+
+       return 0;
+}
+
+static void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
+{
+       struct karatsuba_ctx *ctx2;
+
+       if (ctx->tp)
+               mpi_free_limb_space(ctx->tp);
+       if (ctx->tspace)
+               mpi_free_limb_space(ctx->tspace);
+       for (ctx = ctx->next; ctx; ctx = ctx2) {
+               ctx2 = ctx->next;
+               if (ctx->tp)
+                       mpi_free_limb_space(ctx->tp);
+               if (ctx->tspace)
+                       mpi_free_limb_space(ctx->tspace);
+               xfree(ctx);
+       }
+}
+
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+ * and v (pointed to by VP, with VSIZE limbs), and store the result at
+ * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+ * operands are normalized.  Return the most significant limb of the
+ * result.
+ *
+ * NOTE: The space pointed to by PRODP is overwritten before finished
+ * with U and V, so overlap is an error.
+ *
+ * Argument constraints:
+ * 1. USIZE >= VSIZE.
+ * 2. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ */
+
+static int
+mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+           mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
+{
+       mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
+       mpi_limb_t cy;
+       struct karatsuba_ctx ctx;
+
+       if (vsize < KARATSUBA_THRESHOLD) {
+               mpi_size_t i;
+               mpi_limb_t v_limb;
+
+               if (!vsize) {
+                       *_result = 0;
+                       return 0;
+               }
+
+               /* Multiply by the first limb in V separately, as the result 
can be
+                * stored (not added) to PROD.  We also avoid a loop for 
zeroing.  */
+               v_limb = vp[0];
+               if (v_limb <= 1) {
+                       if (v_limb == 1)
+                               MPN_COPY(prodp, up, usize);
+                       else
+                               MPN_ZERO(prodp, usize);
+                       cy = 0;
+               } else
+                       cy = mpihelp_mul_1(prodp, up, usize, v_limb);
+
+               prodp[usize] = cy;
+               prodp++;
+
+               /* For each iteration in the outer loop, multiply one limb from
+                * U with one limb from V, and add it to PROD.  */
+               for (i = 1; i < vsize; i++) {
+                       v_limb = vp[i];
+                       if (v_limb <= 1) {
+                               cy = 0;
+                               if (v_limb == 1)
+                                       cy = mpihelp_add_n(prodp, prodp, up,
+                                                          usize);
+                       } else
+                               cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
+
+                       prodp[usize] = cy;
+                       prodp++;
+               }
+
+               *_result = cy;
+               return 0;
+       }
+
+       memset(&ctx, 0, sizeof ctx);
+       if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
+               return -ENOMEM;
+       mpihelp_release_karatsuba_ctx(&ctx);
+       *_result = *prod_endp;
+       return 0;
+}
+
+static mpi_limb_t
+mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+             mpi_limb_t s2_limb)
+{
+       mpi_limb_t cy_limb;
+       mpi_size_t j;
+       mpi_limb_t prod_high, prod_low;
+
+       /* The loop counter and index J goes from -S1_SIZE to -1.  This way
+        * the loop becomes faster.  */
+       j = -s1_size;
+
+       /* Offset the base pointers to compensate for the negative indices.  */
+       s1_ptr -= j;
+       res_ptr -= j;
+
+       cy_limb = 0;
+       do {
+               umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+               prod_low += cy_limb;
+               cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+               res_ptr[j] = prod_low;
+       } while (++j);
+
+       return cy_limb;
+}
+
+static mpi_limb_t
+mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+             mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+       mpi_limb_t x, y, cy;
+       mpi_size_t j;
+
+       /* The loop counter and index J goes from -SIZE to -1.  This way
+          the loop becomes faster.  */
+       j = -size;
+
+       /* Offset the base pointers to compensate for the negative indices. */
+       s1_ptr -= j;
+       s2_ptr -= j;
+       res_ptr -= j;
+
+       cy = 0;
+       do {
+               y = s2_ptr[j];
+               x = s1_ptr[j];
+               y += cy;        /* add previous carry to one addend */
+               cy = y < cy;    /* get out carry from that addition */
+               y += x;         /* add other addend */
+               cy += y < x;    /* get out carry from that add, combine */
+               res_ptr[j] = y;
+       } while (++j);
+
+       return cy;
+}
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ * and store the USIZE least significant digits of the result at WP.
+ * Return the bits shifted out from the most significant digit.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be >= UP.
+ */
+
+static mpi_limb_t
+mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt)
+{
+       mpi_limb_t high_limb, low_limb;
+       unsigned sh_1, sh_2;
+       mpi_size_t i;
+       mpi_limb_t retval;
+
+       sh_1 = cnt;
+       wp += 1;
+       sh_2 = BITS_PER_MPI_LIMB - sh_1;
+       i = usize - 1;
+       low_limb = up[i];
+       retval = low_limb >> sh_2;
+       high_limb = low_limb;
+       while (--i >= 0) {
+               low_limb = up[i];
+               wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+               high_limb = low_limb;
+       }
+       wp[i] = high_limb << sh_1;
+
+       return retval;
+}
+
+static mpi_limb_t
+mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+       mpi_limb_t cy_limb;
+       mpi_size_t j;
+       mpi_limb_t prod_high, prod_low;
+       mpi_limb_t x;
+
+       /* The loop counter and index J goes from -SIZE to -1.  This way
+        * the loop becomes faster.  */
+       j = -s1_size;
+       res_ptr -= j;
+       s1_ptr -= j;
+
+       cy_limb = 0;
+       do {
+               umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+
+               prod_low += cy_limb;
+               cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+
+               x = res_ptr[j];
+               prod_low = x + prod_low;
+               cy_limb += prod_low < x ? 1 : 0;
+               res_ptr[j] = prod_low;
+       } while (++j);
+       return cy_limb;
+}
+
+static mpi_limb_t
+mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+             mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+       mpi_limb_t x, y, cy;
+       mpi_size_t j;
+
+       /* The loop counter and index J goes from -SIZE to -1.  This way
+          the loop becomes faster.  */
+       j = -size;
+
+       /* Offset the base pointers to compensate for the negative indices.  */
+       s1_ptr -= j;
+       s2_ptr -= j;
+       res_ptr -= j;
+
+       cy = 0;
+       do {
+               y = s2_ptr[j];
+               x = s1_ptr[j];
+               y += cy;        /* add previous carry to subtrahend */
+               cy = y < cy;    /* get out carry from that addition */
+               y = x - y;      /* main subtract */
+               cy += y > x;    /* get out carry from the subtract, combine */
+               res_ptr[j] = y;
+       } while (++j);
+
+       return cy;
+}
+
+/****************
+ * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ * There are no restrictions on the relative sizes of
+ * the two arguments.
+ * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
+ */
+static int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size)
+{
+       mpi_size_t i;
+       mpi_limb_t op1_word, op2_word;
+
+       for (i = size - 1; i >= 0; i--) {
+               op1_word = op1_ptr[i];
+               op2_word = op2_ptr[i];
+               if (op1_word != op2_word)
+                       goto diff;
+       }
+       return 0;
+
+diff:
+       /* This can *not* be simplified to
+        *   op2_word - op2_word
+        * since that expression might give signed overflow.  */
+       return (op1_word > op2_word) ? 1 : -1;
+}
+
+static void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs)
+{
+       mpi_free_limb_space(a->d);
+       a->d = ap;
+       a->alloced = nlimbs;
+}
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ * and store the USIZE least significant limbs of the result at WP.
+ * The bits shifted out to the right are returned.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be <= UP.
+ */
+
+static mpi_limb_t
+mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
+{
+       mpi_limb_t high_limb, low_limb;
+       unsigned sh_1, sh_2;
+       mpi_size_t i;
+       mpi_limb_t retval;
+
+       sh_1 = cnt;
+       wp -= 1;
+       sh_2 = BITS_PER_MPI_LIMB - sh_1;
+       high_limb = up[0];
+       retval = high_limb << sh_2;
+       low_limb = high_limb;
+       for (i = 1; i < usize; i++) {
+               high_limb = up[i];
+               wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+               low_limb = high_limb;
+       }
+       wp[i] = low_limb >> sh_1;
+
+       return retval;
+}
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ * the NSIZE-DSIZE least significant quotient limbs at QP
+ * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
+ * non-zero, generate that many fraction bits and append them after the
+ * other quotient limbs.
+ * Return the most significant limb of the quotient, this is always 0 or 1.
+ *
+ * Preconditions:
+ * 0. NSIZE >= DSIZE.
+ * 1. The most significant bit of the divisor must be set.
+ * 2. QP must either not overlap with the input operands at all, or
+ *    QP + DSIZE >= NP must hold true. (This means that it's
+ *    possible to put the quotient in the high part of NUM, right after the
+ *    remainder in NUM.
+ * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ */
+
+static mpi_limb_t
+mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+              mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
+{
+       mpi_limb_t most_significant_q_limb = 0;
+
+       switch (dsize) {
+       case 0:
+               /* We are asked to divide by zero, so go ahead and do it!  (To 
make
+                  the compiler not remove this statement, return the value.)  
*/
+               /*
+                * existing clients of this function have been modified
+                * not to call it with dsize == 0, so this should not happen
+                */
+               return 1 / dsize;
+
+       case 1:
+               {
+                       mpi_size_t i;
+                       mpi_limb_t n1;
+                       mpi_limb_t d;
+
+                       d = dp[0];
+                       n1 = np[nsize - 1];
+
+                       if (n1 >= d) {
+                               n1 -= d;
+                               most_significant_q_limb = 1;
+                       }
+
+                       qp += qextra_limbs;
+                       for (i = nsize - 2; i >= 0; i--)
+                               udiv_qrnnd(qp[i], n1, n1, np[i], d);
+                       qp -= qextra_limbs;
+
+                       for (i = qextra_limbs - 1; i >= 0; i--)
+                               udiv_qrnnd(qp[i], n1, n1, 0, d);
+
+                       np[0] = n1;
+               }
+               break;
+
+       case 2:
+               {
+                       mpi_size_t i;
+                       mpi_limb_t n1, n0, n2;
+                       mpi_limb_t d1, d0;
+
+                       np += nsize - 2;
+                       d1 = dp[1];
+                       d0 = dp[0];
+                       n1 = np[1];
+                       n0 = np[0];
+
+                       if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
+                               sub_ddmmss(n1, n0, n1, n0, d1, d0);
+                               most_significant_q_limb = 1;
+                       }
+
+                       for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
+                               mpi_limb_t q;
+                               mpi_limb_t r;
+
+                               if (i >= qextra_limbs)
+                                       np--;
+                               else
+                                       np[0] = 0;
+
+                               if (n1 == d1) {
+                                       /* Q should be either 111..111 or 
111..110.  Need special
+                                        * treatment of this rare case as 
normal division would
+                                        * give overflow.  */
+                                       q = ~(mpi_limb_t) 0;
+
+                                       r = n0 + d1;
+                                       if (r < d1) {   /* Carry in the 
addition? */
+                                               add_ssaaaa(n1, n0, r - d0,
+                                                          np[0], 0, d0);
+                                               qp[i] = q;
+                                               continue;
+                                       }
+                                       n1 = d0 - (d0 != 0 ? 1 : 0);
+                                       n0 = -d0;
+                               } else {
+                                       udiv_qrnnd(q, r, n1, n0, d1);
+                                       umul_ppmm(n1, n0, d0, q);
+                               }
+
+                               n2 = np[0];
+q_test:
+                               if (n1 > r || (n1 == r && n0 > n2)) {
+                                       /* The estimated Q was too large.  */
+                                       q--;
+                                       sub_ddmmss(n1, n0, n1, n0, 0, d0);
+                                       r += d1;
+                                       if (r >= d1)    /* If not carry, test Q 
again.  */
+                                               goto q_test;
+                               }
+
+                               qp[i] = q;
+                               sub_ddmmss(n1, n0, r, n2, n1, n0);
+                       }
+                       np[1] = n1;
+                       np[0] = n0;
+               }
+               break;
+
+       default:
+               {
+                       mpi_size_t i;
+                       mpi_limb_t dX, d1, n0;
+
+                       np += nsize - dsize;
+                       dX = dp[dsize - 1];
+                       d1 = dp[dsize - 2];
+                       n0 = np[dsize - 1];
+
+                       if (n0 >= dX) {
+                               if (n0 > dX
+                                   || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
+                                       mpihelp_sub_n(np, np, dp, dsize);
+                                       n0 = np[dsize - 1];
+                                       most_significant_q_limb = 1;
+                               }
+                       }
+
+                       for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) 
{
+                               mpi_limb_t q;
+                               mpi_limb_t n1, n2;
+                               mpi_limb_t cy_limb;
+
+                               if (i >= qextra_limbs) {
+                                       np--;
+                                       n2 = np[dsize];
+                               } else {
+                                       n2 = np[dsize - 1];
+                                       MPN_COPY_DECR(np + 1, np, dsize - 1);
+                                       np[0] = 0;
+                               }
+
+                               if (n0 == dX) {
+                                       /* This might over-estimate q, but it's 
probably not worth
+                                        * the extra code here to find out.  */
+                                       q = ~(mpi_limb_t) 0;
+                               } else {
+                                       mpi_limb_t r;
+
+                                       udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
+                                       umul_ppmm(n1, n0, d1, q);
+
+                                       while (n1 > r
+                                              || (n1 == r
+                                                  && n0 > np[dsize - 2])) {
+                                               q--;
+                                               r += dX;
+                                               if (r < dX)     /* I.e. "carry 
in previous addition?" */
+                                                       break;
+                                               n1 -= n0 < d1;
+                                               n0 -= d1;
+                                       }
+                               }
+
+                               /* Possible optimization: We already have (q * 
n0) and (1 * n1)
+                                * after the calculation of q.  Taking 
advantage of that, we
+                                * could make this loop make two iterations 
less.  */
+                               cy_limb = mpihelp_submul_1(np, dp, dsize, q);
+
+                               if (n2 != cy_limb) {
+                                       mpihelp_add_n(np, np, dp, dsize);
+                                       q--;
+                               }
+
+                               qp[i] = q;
+                               n0 = np[dsize - 1];
+                       }
+               }
+       }
+
+       return most_significant_q_limb;
+}
+
+static mpi_limb_t
+mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+       mpi_limb_t cy_limb;
+       mpi_size_t j;
+       mpi_limb_t prod_high, prod_low;
+       mpi_limb_t x;
+
+       /* The loop counter and index J goes from -SIZE to -1.  This way
+        * the loop becomes faster.  */
+       j = -s1_size;
+       res_ptr -= j;
+       s1_ptr -= j;
+
+       cy_limb = 0;
+       do {
+               umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+
+               prod_low += cy_limb;
+               cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+
+               x = res_ptr[j];
+               prod_low = x - prod_low;
+               cy_limb += prod_low > x ? 1 : 0;
+               res_ptr[j] = prod_low;
+       } while (++j);
+
+       return cy_limb;
+}
+
+/**
+ * mpi_read_raw_data - Read a raw byte stream as a positive integer
+ * @xbuffer: The data to read
+ * @nbytes: The amount of data to read
+ */
+MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes)
+{
+       const uint8_t *buffer = xbuffer;
+       int i, j;
+       unsigned nbits, nlimbs;
+       mpi_limb_t a;
+       MPI val = NULL;
+
+       while (nbytes > 0 && buffer[0] == 0) {
+               buffer++;
+               nbytes--;
+       }
+
+       nbits = nbytes * 8;
+       if (nbits > MAX_EXTERN_MPI_BITS) {
+               printk("MPI: mpi too large (%u bits)\n", nbits);
+               return NULL;
+       }
+       if (nbytes > 0)
+               nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8);
+
+       nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
+       val = mpi_alloc(nlimbs);
+       if (!val)
+               return NULL;
+       val->nbits = nbits;
+       val->sign = 0;
+       val->nlimbs = nlimbs;
+
+       if (nbytes > 0) {
+               i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+               i %= BYTES_PER_MPI_LIMB;
+               for (j = nlimbs; j > 0; j--) {
+                       a = 0;
+                       for (; i < BYTES_PER_MPI_LIMB; i++) {
+                               a <<= 8;
+                               a |= *buffer++;
+                       }
+                       i = 0;
+                       val->d[j - 1] = a;
+               }
+       }
+       return val;
+}
+
+/****************
+ * Note:  It was a bad idea to use the number of limbs to allocate
+ *       because on a alpha the limbs are large but we normally need
+ *       integers of n bits - So we should chnage this to bits (or bytes).
+ *
+ *       But mpi_alloc is used in a lot of places :-)
+ */
+MPI mpi_alloc(unsigned nlimbs)
+{
+       MPI a;
+
+       a = xmalloc(struct mpi);
+       if (!a)
+               return a;
+
+       if (nlimbs) {
+               a->d = mpi_alloc_limb_space(nlimbs);
+               if (!a->d) {
+                       xfree(a);
+                       return NULL;
+               }
+       } else {
+               a->d = NULL;
+       }
+
+       a->alloced = nlimbs;
+       a->nlimbs = 0;
+       a->sign = 0;
+       a->flags = 0;
+       a->nbits = 0;
+       return a;
+}
+
+void mpi_free(MPI a)
+{
+       if (!a)
+               return;
+
+       if (a->flags & 4)
+               xfree(a->d);
+       else
+               mpi_free_limb_space(a->d);
+
+       if (a->flags & ~7)
+               printk("invalid flag value in mpi\n");
+       xfree(a);
+}
+
+int mpi_cmp_ui(MPI u, unsigned long v)
+{
+       mpi_limb_t limb = v;
+
+       mpi_normalize(u);
+       if (!u->nlimbs && !limb)
+               return 0;
+       if (u->sign)
+               return -1;
+       if (u->nlimbs > 1)
+               return 1;
+
+       if (u->d[0] == limb)
+               return 0;
+       else if (u->d[0] > limb)
+               return 1;
+       else
+               return -1;
+}
+
+int mpi_cmp(MPI u, MPI v)
+{
+       mpi_size_t usize, vsize;
+       int cmp;
+
+       mpi_normalize(u);
+       mpi_normalize(v);
+       usize = u->nlimbs;
+       vsize = v->nlimbs;
+       if (!u->sign && v->sign)
+               return 1;
+       if (u->sign && !v->sign)
+               return -1;
+       if (usize != vsize && !u->sign && !v->sign)
+               return usize - vsize;
+       if (usize != vsize && u->sign && v->sign)
+               return vsize - usize;
+       if (!usize)
+               return 0;
+       cmp = mpihelp_cmp(u->d, v->d, usize);
+       if (u->sign)
+               return -cmp;
+       return cmp;
+}
+
+/****************
+ * Sometimes we have MSL (most significant limbs) which are 0;
+ * this is for some reasons not good, so this function removes them.
+ */
+static void mpi_normalize(MPI a)
+{
+       for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
+               ;
+}
+
+/****************
+ * Return the number of bits in A.
+ */
+unsigned mpi_get_nbits(MPI a)
+{
+       unsigned n;
+
+       mpi_normalize(a);
+
+       if (a->nlimbs) {
+               mpi_limb_t alimb = a->d[a->nlimbs - 1];
+               if (alimb)
+                       n = count_leading_zeros(alimb);
+               else
+                       n = BITS_PER_MPI_LIMB;
+               n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB;
+       } else
+               n = 0;
+       return n;
+}
+
+int mpi_test_bit(MPI a, unsigned int n)
+{
+       unsigned int limbno, bitno;
+       mpi_limb_t limb;
+
+       limbno = n / BITS_PER_MPI_LIMB;
+       bitno  = n % BITS_PER_MPI_LIMB;
+
+       if (limbno >= a->nlimbs)
+               return 0; /* too far left: this is a 0 */
+       limb = a->d[limbno];
+       return (limb & (((mpi_limb_t)1) << bitno))? 1: 0;
+}
diff --git a/xen/crypto/Makefile b/xen/crypto/Makefile
index db29655333a3..d88374ddf221 100644
--- a/xen/crypto/Makefile
+++ b/xen/crypto/Makefile
@@ -1,2 +1,3 @@
 obj-y += rijndael.o
+obj-y += rsa.o
 obj-y += vmac.o
diff --git a/xen/crypto/rsa.c b/xen/crypto/rsa.c
new file mode 100644
index 000000000000..eff32222fe3b
--- /dev/null
+++ b/xen/crypto/rsa.c
@@ -0,0 +1,194 @@
+/* rsa.c
+
+   The RSA publickey algorithm.
+
+   Copyright (C) 2001 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+*/
+
+#include <xen/rsa.h>
+#include <xen/lib.h>
+#include <xen/err.h>
+#include <xen/bug.h>
+#include <xen/string.h>
+
+void rsa_public_key_init(struct rsa_public_key *key)
+{
+    key->n = NULL;
+    key->e = NULL;
+    key->size = 0;
+}
+
+/*
+ * Computes the size, in octets, of the modulo. Returns 0 if the
+ * modulo is too small to be useful, or otherwise appears invalid.
+ */
+static size_t rsa_check_size(MPI n)
+{
+    /* Round upwards */
+    size_t size;
+
+    /* Even moduli are invalid */
+    if ( mpi_test_bit(n, 0) == 0 )
+        return 0;
+
+    size = (mpi_get_nbits(n) + 7) / 8;
+
+    if ( size < RSA_MINIMUM_N_OCTETS )
+        return 0;
+
+    return size;
+}
+
+int rsa_public_key_prepare(struct rsa_public_key *key)
+{
+    if ( !key->n || !key->e || key->size )
+        return -EINVAL;
+
+    key->size = rsa_check_size(key->n);
+
+    return key->size > 0 ? 0 : -EINVAL;
+}
+
+/*
+ * Formats the PKCS#1 padding, of the form
+ *
+ *   0x00 0x01 0xff ... 0xff 0x00 id ...digest...
+ *
+ * where the 0xff ... 0xff part consists of at least 8 octets. The
+ * total size equals the octet size of n.
+ */
+static uint8_t *pkcs1_signature_prefix(unsigned int key_size, uint8_t *buffer,
+                                       unsigned int id_size, const uint8_t *id,
+                                       unsigned int digest_size)
+{
+    unsigned int j;
+
+    if ( key_size < 11 + id_size + digest_size )
+        return NULL;
+
+    j = key_size - digest_size - id_size;
+
+    memcpy(buffer + j, id, id_size);
+    buffer[0] = 0;
+    buffer[1] = 1;
+    buffer[j - 1] = 0;
+
+    ASSERT(j >= 11);
+    memset(buffer + 2, 0xff, j - 3);
+
+    return buffer + j + id_size;
+}
+
+/*
+ * From RFC 3447, Public-Key Cryptography Standards (PKCS) #1: RSA
+ * Cryptography Specifications Version 2.1.
+ *
+ *     id-sha256    OBJECT IDENTIFIER ::=
+ *       {joint-iso-itu-t(2) country(16) us(840) organization(1)
+ *         gov(101) csor(3) nistalgorithm(4) hashalgs(2) 1}
+ */
+static const uint8_t
+sha256_prefix[] =
+{
+  /* 19 octets prefix, 32 octets hash, total 51 */
+  0x30,      49, /* SEQUENCE */
+    0x30,    13, /* SEQUENCE */
+      0x06,   9, /* OBJECT IDENTIFIER */
+        0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
+      0x05,   0, /* NULL */
+    0x04,    32  /* OCTET STRING */
+      /* Here comes the raw hash value */
+};
+
+static int pkcs1_rsa_sha256_encode(MPI *m, size_t key_size,
+                                   struct sha2_256_state *hash)
+{
+    uint8_t *ptr;
+    uint8_t *buf;
+
+    buf = xmalloc_bytes(key_size);
+    if ( !buf )
+        return -ENOMEM;
+
+    ptr = pkcs1_signature_prefix(key_size, buf,
+                                 sizeof(sha256_prefix), sha256_prefix,
+                                 SHA2_256_DIGEST_SIZE);
+    if ( !ptr )
+    {
+        xfree(buf);
+        return -EINVAL;
+    }
+
+    sha2_256_final(hash, ptr);
+    *m = mpi_read_raw_data(buf, key_size);
+    xfree(buf);
+    return 0;
+}
+
+static int rsa_verify(const struct rsa_public_key *key, MPI m, MPI s)
+{
+    int ret;
+    MPI m1;
+
+    /* (1) Validate 0 <= s < n */
+    if ( mpi_cmp_ui(s, 0) < 0 || mpi_cmp(s, key->n) >= 0 )
+        return -EINVAL;
+
+    m1 = mpi_alloc(key->size / BYTES_PER_MPI_LIMB);
+    if ( !m1 )
+        return -ENOMEM;
+
+    /* (2) m = s^e mod n */
+    ret = mpi_powm(m1, s, key->e, key->n);
+    if ( ret )
+        goto out;
+
+    ret = mpi_cmp (m, m1) ? -EINVAL : 0;
+
+out:
+    mpi_free(m1);
+    return ret;
+}
+
+int rsa_sha256_verify(const struct rsa_public_key *key,
+                      struct sha2_256_state *hash, MPI s)
+{
+    int ret;
+    MPI m;
+
+    ret = pkcs1_rsa_sha256_encode(&m, key->size, hash);
+    if ( ret )
+        return ret;
+
+    ret = rsa_verify(key, m, s);
+
+    mpi_free(m);
+
+    return ret;
+}
diff --git a/xen/include/xen/mpi.h b/xen/include/xen/mpi.h
new file mode 100644
index 000000000000..987971b848e8
--- /dev/null
+++ b/xen/include/xen/mpi.h
@@ -0,0 +1,63 @@
+/* mpi.h  -  Multi Precision Integers
+ *        Copyright (C) 1994, 1996, 1998, 1999,
+ *                    2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *         Actually it's the same code with only minor changes in the
+ *         way the data is stored; this is to support the abstraction
+ *         of an optional secure memory allocation which may be used
+ *         to avoid revealing of sensitive data due to paging etc.
+ *         The GNU MP Library itself is published under the LGPL;
+ *         however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef MPI_H
+#define MPI_H
+
+#include <xen/types.h>
+
+#define BYTES_PER_MPI_LIMB      (BITS_PER_LONG / 8)
+#define BITS_PER_MPI_LIMB       BITS_PER_LONG
+
+typedef unsigned long int mpi_limb_t;
+typedef signed long int mpi_limb_signed_t;
+
+struct mpi {
+    int alloced;        /* array size (# of allocated limbs) */
+    int nlimbs;         /* number of valid limbs */
+    int nbits;          /* the real number of valid bits (info only) */
+    int sign;           /* indicates a negative number */
+    unsigned flags;     /* bit 0: array must be allocated in secure memory 
space */
+    /* bit 1: not used */
+    /* bit 2: the limb is a pointer to some m_alloced data */
+    mpi_limb_t *d;      /* array with the limbs */
+};
+
+typedef struct mpi *MPI;
+
+MPI mpi_alloc(unsigned nlimbs);
+void mpi_free(MPI a);
+MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes);
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
+int mpi_cmp_ui(MPI u, unsigned long v);
+int mpi_cmp(MPI u, MPI v);
+unsigned mpi_get_nbits(MPI a);
+int mpi_test_bit(MPI a, unsigned int n);
+
+#endif /* MPI_H */
diff --git a/xen/include/xen/rsa.h b/xen/include/xen/rsa.h
new file mode 100644
index 000000000000..13b5ebe9a1e6
--- /dev/null
+++ b/xen/include/xen/rsa.h
@@ -0,0 +1,72 @@
+/* rsa.h
+
+   The RSA publickey algorithm.
+
+   Copyright (C) 2001, 2002 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+*/
+ 
+#ifndef RSA_H
+#define RSA_H
+
+#include <xen/mpi.h>
+#include <xen/sha2.h>
+#include <xen/types.h>
+
+/*
+ * This limit is somewhat arbitrary. Technically, the smallest modulo
+ * which makes sense at all is 15 = 3*5, phi(15) = 8, size 4 bits. But
+ * for ridiculously small keys, not all odd e are possible (e.g., for
+ * 5 bits, the only possible modulo is 3*7 = 21, phi(21) = 12, and e =
+ * 3 don't work). The smallest size that makes sense with pkcs#1, and
+ * which allows RSA encryption of one byte messages, is 12 octets, 89
+ * bits.
+ */
+#define RSA_MINIMUM_N_OCTETS 12
+#define RSA_MINIMUM_N_BITS (8 * RSA_MINIMUM_N_OCTETS - 7)
+
+struct rsa_public_key
+{
+    /*
+     * Size of the modulo, in octets. This is also the size of all
+     * signatures that are created or verified with this key.
+     */
+    size_t size;
+    MPI n; /* Modulo */
+    MPI e; /* Public exponent */
+};
+
+void rsa_public_key_init(struct rsa_public_key *key);
+
+int rsa_public_key_prepare(struct rsa_public_key *key);
+
+int rsa_sha256_verify(const struct rsa_public_key *key,
+                      struct sha2_256_state *hash,
+                      MPI signature);
+
+#endif /* RSA_H */
-- 
2.49.0
Follow-Ups:
- Re: [PATCH 2/4] crypto: Add RSA support
  - From: Jan Beulich
References:
- [PATCH 0/4] LivePatch signing support
  - From: Ross Lagerwall
Prev by Date: [PATCH 0/4] LivePatch signing support
Next by Date: [PATCH 3/4] livepatch: Embed public key in Xen
Previous by thread: [PATCH 0/4] LivePatch signing support
Next by thread: Re: [PATCH 2/4] crypto: Add RSA support
Index(es):
- Date
- Thread
Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.