#include #include #include #include #include #include #include #include #include /* * inline asm constraint reference * http://blogs.sun.com/x86be/entry/gcc_style_asm_inlining_support */ #define MAXLOOP 10000000ULL #ifndef M_PIl /* GNU extension */ #define M_PIl 3.1415926535897932384626433832795029L #endif #define FPREGS_NUM 8 #define FPREG_SIZE 10 #define FPSTACK_SIZE (FPREGS_NUM * FPREG_SIZE) /* * http://lxr.linux.no/linux+v2.6.27.5/include/asm-x86/processor.h#L283 */ struct i387_fsave { uint32_t fs_cwd; /* FPU Control Word */ uint32_t fs_swd; /* FPU Status Word */ uint32_t fs_twd; /* FPU Tag Word */ uint32_t fs_fip; /* FPU IP Offset */ uint32_t fs_fcs; /* FPU IP Selector */ uint32_t fs_foo; /* FPU Operand Pointer Offset */ uint32_t fs_fos; /* FPU Operand Pointer Selector */ /* 8*10 bytes for each FP-reg = 80 bytes: */ uint8_t fs_st_space[FPSTACK_SIZE]; /* Software status information [not touched by FSAVE ]: */ uint32_t fs_status; }; void snprintfhex(char *buf, size_t size, const long double val) { size_t i, len; const uint8_t *uval; len = 0; uval = (const uint8_t *)&val; for (i = 0; i < size; i++) len += snprintf(&buf[len], size - len, "%02x", uval[size - 1 - i]); } void printfhex(const char *fmt, const char *str, const long double val, ...) { char buf[100]; char buf2[200]; va_list ap; snprintfhex(buf, sizeof(buf), val); snprintf(buf2, sizeof(buf2), "%s%s%s", fmt, buf, str); va_start(ap, val); vprintf(buf2, ap); va_end(ap); } void print_fsave(struct i387_fsave f, const char *msg) { int i; long double *val; printf("\tCtrl: %08x %08x %08x %08x %08x %08x\n", f.fs_cwd, f.fs_swd, f.fs_twd, f.fs_fip, f.fs_fcs, f.fs_foo, f.fs_fos); for (i = 0; i < FPREGS_NUM; i++) { val = (long double *)&(f.fs_st_space[i * FPREG_SIZE]); printfhex("\t%i %20.20Le ", (i % 2) ? "\n" : "", *val, i, *val); } printf("\n"); } void test_add(const char *msg) { float f; double d; long double ld; uint64_t i; printf("%s: %s\n", __func__, msg); f = 0.0; d = 0.0; ld = 0.0; printfhex("f: %20.20e, ", "\n", f, f); printfhex("d: %20.20le, ", "\n", d, d); printfhex("ld: %20.20Le, ", "\n", ld, ld); for (i = 0; i < MAXLOOP; i++) { f += 0.1; d += 0.1; ld += 0.1; } printfhex("f: %20.20e, ", "\n", f, f); printfhex("d: %20.20le, ", "\n", d, d); printfhex("ld: %20.20Le, ", "\n", ld, ld); } void test_sin(const char *msg) { float f; double d; long double ld; uint64_t i; printf("%s: %s\n", __func__, msg); #if 0 sin(ln(2) radians) = ~ 0.63896 #endif f = 0.0; d = 0.0; ld = 0.0; asm volatile( "fldln2;" "fsin;" : "=t" (f)); asm volatile( "fldln2;" "fsin;" : "=t" (d)); asm volatile( "fldln2;" "fsin;" : "=t" (ld)); printfhex("f: %20.20e, ", "\n", f, f); printfhex("d: %20.20le, ", "\n", d, d); printfhex("ld: %20.20Le, ", "\n", ld, ld); } void test_manysin(const char *msg) { long double ld = 0.0, ldout = 0.0; int i; printf("%s: %s\n", __func__, msg); /* Whole range */ for (ld = 2.0e-63, i = -63; i < 63; ld *= -10.0, i++) { asm volatile( "fsin" : "=t" (ldout) : "0" (ld) ); printfhex("sin( %20.20Le, ", "", ld, ld); printfhex(") = %20.20Le, ", "\n", ldout, ldout); } /* -2pi to 2pi */ for (ld = -2.0*M_PIl; ld < 2.0*M_PIl; ld += 0.03) { asm volatile( "fsin" : "=t" (ldout) : "0" (ld) ); printfhex("sin( %20.20Le, ", "", ld, ld); printfhex(") = %20.20Le, ", "\n", ldout, ldout); } ld = 0.7853975; /* pi/4: sin(ld) = 1/sqrt2 = 0.707 */ asm volatile( "fsin" : "=t" (ldout) : "0" (ld) ); printfhex("sin( %20.20Le, ", "", ld, ld); printfhex(") = %20.20Le, ", "\n", ldout, ldout); } /* sse */ void test_recip(const char *msg) { float f, fout; int i; printf("%s: %s\n", __func__, msg); for (f = 1.0e-44, i = -44; i < 44; f *= -10.0, i++) { asm volatile( "rcpss %1, %0" : "=x" (fout) : "x" (f) ); printfhex("rcpss( %20.20e, ", "", f, f); printfhex(" ) = %20.20e, ", "\n", fout, fout); asm volatile( "rsqrtss %1, %0" : "=x" (fout) : "x" (f) ); printfhex("rsqrtss( %20.20e, ", "", f, f); printfhex(" ) = %20.20e, ", "", fout, fout); } printf("\n"); } /* sse */ void test_addsubmuldiv(const char *msg) { float f1 = 1.0e-44; float f2 = -3.124e-44; float fout; double d1 = 1.0e-300; double d2 = -3.124e-300; double dout; while (d1 < 1.0e300) { /* Add */ asm volatile("movd %1, %0; addss %2, %0" : "=x" (fout) /*0*/ : "m" (f1) /*1*/, "m" (f2) /*2*/ ); asm volatile("movsd %1, %0; addsd %2, %0" : "=x" (dout) /*0*/ : "m" (d1) /*1*/, "m" (d2) /*2*/ ); printfhex("addss( %20.20e, ", "", f1, f1); printfhex(" %20.20e", "", f2, f2); printfhex(" ) = %20.20e, ", "\n", fout, fout); printfhex("addsd( %20.20le, ", "", d1, d1); printfhex(" %20.20le", "", d2, d2); printfhex(" ) = %20.20le, ", "\n", dout, dout); printfhex("d1 + d2 = %20.20le, ", "\n", d1 + d2, d1 + d2); /* Sub */ asm volatile("movd %1, %0; subss %2, %0" : "=x" (fout) /*0*/ : "m" (f1) /*1*/, "m" (f2) /*2*/ ); asm volatile("movsd %1, %0; subsd %2, %0" : "=x" (dout) /*0*/ : "m" (d1) /*1*/, "m" (d2) /*2*/ ); printfhex("subss( %20.20e, ", "", f1, f1); printfhex(" %20.20e", "", f2, f2); printfhex(" ) = %20.20e, ", "\n", fout, fout); printfhex("subsd( %20.20le, ", "", d1, d1); printfhex(" %20.20le", "", d2, d2); printfhex(" ) = %20.20le, ", "\n", dout, dout); printfhex("d1 - d2 = %20.20le, ", "\n", d1 - d2, d1 - d2); /* Mul */ asm volatile( "movd %1, %0; mulss %2, %0" : "=x" (fout) /*0*/ : "m" (f1) /*1*/, "m" (f2) /*2*/ ); asm volatile( "movsd %1, %0; mulsd %2, %0" : "=x" (dout) /*0*/ : "m" (d1) /*1*/, "m" (d2) /*2*/ ); printfhex("mulss( %20.20e, ", "", f1, f1); printfhex(" %20.20e", "", f2, f2); printfhex(" ) = %20.20e, ", "\n", fout, fout); printfhex("mulsd( %20.20le, ", "", d1, d1); printfhex(" %20.20le", "", d2, d2); printfhex(" ) = %20.20le, ", "\n", dout, dout); printfhex("d1 * d2 = %20.20le, ", "\n", d1 * d2, d1 * d2); /* Div */ asm volatile("movd %1, %0; divss %2, %0" : "=x" (fout) /*0*/ : "m" (f1) /*1*/, "m" (f2) /*2*/ ); asm volatile("movsd %1, %0; divsd %2, %0" : "=x" (dout) /*0*/ : "m" (d1) /*1*/, "m" (d2) /*2*/ ); printfhex("divss( %20.20e, ", "", f1, f1); printfhex(" %20.20e", "", f2, f2); printfhex(" ) = %20.20e, ", "\n", fout, fout); printfhex("divsd( %20.20le, ", "", d1, d1); printfhex(" %20.20le", "", d2, d2); printfhex(" ) = %20.20le, ", "\n", dout, dout); printfhex("d1 / d2 = %20.20le, ", "\n", d1 / d2, d1 / d2); f1 *= -4.0 * 3.14159; d1 *= -4.0 * 3.14159; f2 *= -5.0 * 2.78182; d2 *= -5.0 * 2.78182; } printf("\n"); } void test_fsave(const char *msg) { printf("%s: %s\n", __func__, msg); struct i387_fsave f; asm("finit; fld1; fldl2e; fldl2t; fldlg2; fldln2; fldz; fldpi; fldpi; fcos; fsave %0" : : "m"(f) ); print_fsave(f, "filled stack"); /* * Stack: * 0: cos(pi) = -1.0 * 1: pi = 3.14... * 2: 0.0 * 3: ln(2) = 0.6931... * 4: log10(2) = 0.3010... * 5: log2(10) = 3.3219... * 6: log2(e) = 1.4426... * 7: 1.0 */ } #define ALLTESTS 6 int main(int argc, const char * const argv[]) { int i, runtests; int testnr; printf("sizeof(float): %u\n", sizeof(float)); printf("sizeof(double): %u\n", sizeof(double)); printf("sizeof(long double): %u\n", sizeof(long double)); runtests = argc; if (runtests == 1) runtests = ALLTESTS; for (i = 1; i <= runtests; i++) { if (argc == 1) { testnr = i; } else { testnr = (int)strtol(argv[i-1], NULL, 0); if (errno == ERANGE) continue; } switch (testnr) { case 1: test_add("floating add"); break; case 2: test_sin("floating sine"); break; case 3: test_fsave("fsave"); break; case 4: test_manysin("sin of +- 2e-63 .. 2e63"); break; case 5: test_recip("rcpss & rsqrtss"); break; case 6: test_addsubmuldiv("sse add, sub, mul, div"); break; } } return 0; }