/* * Test Floating Point Conversion */ /* we want additional float type definitions */ #define __STDC_WANT_IEC_60559_BFP_EXT__ #define __STDC_WANT_IEC_60559_TYPES_EXT__ #include #include #include #include #include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static char flag_str[256]; static char *get_flag_state(int flags) { if (flags) { snprintf(flag_str, sizeof(flag_str), "%s %s %s %s %s", flags & FE_OVERFLOW ? "OVERFLOW" : "", flags & FE_UNDERFLOW ? "UNDERFLOW" : "", flags & FE_DIVBYZERO ? "DIV0" : "", flags & FE_INEXACT ? "INEXACT" : "", flags & FE_INVALID ? "INVALID" : ""); } else { snprintf(flag_str, sizeof(flag_str), "OK"); } return flag_str; } static void print_double_number(int i, double num) { uint64_t double_as_hex = *(uint64_t *) # int flags = fetestexcept(FE_ALL_EXCEPT); char *fstr = get_flag_state(flags); printf("%02d DOUBLE: %02.20e / %#020" PRIx64 " (%#x => %s)\n", i, num, double_as_hex, flags, fstr); } static void print_single_number(int i, float num) { uint32_t single_as_hex = *(uint32_t *) # int flags = fetestexcept(FE_ALL_EXCEPT); char *fstr = get_flag_state(flags); printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n", i, num, single_as_hex, flags, fstr); } static void print_half_number(int i, uint16_t num) { int flags = fetestexcept(FE_ALL_EXCEPT); char *fstr = get_flag_state(flags); printf("%02d HALF: %#04x (%#x => %s)\n", i, num, flags, fstr); } static void print_int64(int i, int64_t num) { uint64_t int64_as_hex = *(uint64_t *) # int flags = fetestexcept(FE_ALL_EXCEPT); char *fstr = get_flag_state(flags); printf("%02d INT64: %20" PRId64 "/%#020" PRIx64 " (%#x => %s)\n", i, num, int64_as_hex, flags, fstr); } #ifndef SNANF /* Signaling NaN macros, if supported. */ # define SNANF (__builtin_nansf ("")) # define SNAN (__builtin_nans ("")) # define SNANL (__builtin_nansl ("")) #endif float single_numbers[] = { -SNANF, -NAN, -INFINITY, -FLT_MAX, -1.111E+31, -1.111E+30, -1.08700982e-12, -1.78051176e-20, -FLT_MIN, 0.0, FLT_MIN, 2.98023224e-08, 5.96046E-8, /* min positive FP16 subnormal */ 6.09756E-5, /* max subnormal FP16 */ 6.10352E-5, /* min positive normal FP16 */ 1.0, 1.0009765625, /* smallest float after 1.0 FP16 */ 2.0, M_E, M_PI, 65503.0, 65504.0, /* max FP16 */ 65505.0, 131007.0, 131008.0, /* max AFP */ 131009.0, 1.111E+30, FLT_MAX, INFINITY, NAN, SNANF }; static void convert_single_to_half(void) { int i; printf("Converting single-precision to half-precision\n"); for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { float input = single_numbers[i]; feclearexcept(FE_ALL_EXCEPT); print_single_number(i, input); #if defined(__arm__) uint32_t output; asm("vcvtb.f16.f32 %0, %1" : "=t" (output) : "x" (input)); #else uint16_t output; asm("fcvt %h0, %s1" : "=w" (output) : "x" (input)); #endif print_half_number(i, output); } } static void convert_single_to_double(void) { int i; printf("Converting single-precision to double-precision\n"); for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { float input = single_numbers[i]; /* uint64_t output; */ double output; feclearexcept(FE_ALL_EXCEPT); print_single_number(i, input); #if defined(__arm__) asm("vcvt.f64.f32 %P0, %1" : "=w" (output) : "t" (input)); #else asm("fcvt %d0, %s1" : "=w" (output) : "x" (input)); #endif print_double_number(i, output); } } static void convert_single_to_integer(void) { int i; printf("Converting single-precision to integer\n"); for (i = 0; i < ARRAY_SIZE(single_numbers); ++i) { float input = single_numbers[i]; int64_t output; feclearexcept(FE_ALL_EXCEPT); print_single_number(i, input); #if defined(__arm__) /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ output = input; #else asm("fcvtzs %0, %s1" : "=r" (output) : "w" (input)); #endif print_int64(i, output); } } /* This allows us to initialise some doubles as pure hex */ typedef union { double d; uint64_t h; } test_doubles; test_doubles double_numbers[] = { {SNAN}, {-NAN}, {-INFINITY}, {-DBL_MAX}, {-FLT_MAX-1.0}, {-FLT_MAX}, {-1.111E+31}, {-1.111E+30}, /* half prec */ {-2.0}, {-1.0}, {-DBL_MIN}, {-FLT_MIN}, {0.0}, {FLT_MIN}, {2.98023224e-08}, {5.96046E-8}, /* min positive FP16 subnormal */ {6.09756E-5}, /* max subnormal FP16 */ {6.10352E-5}, /* min positive normal FP16 */ {1.0}, {1.0009765625}, /* smallest float after 1.0 FP16 */ {DBL_MIN}, {1.3789972848607228e-308}, {1.4914738736681624e-308}, {1.0}, {2.0}, {M_E}, {M_PI}, {65503.0}, {65504.0}, /* max FP16 */ {65505.0}, {131007.0}, {131008.0}, /* max AFP */ {131009.0}, {.h = 0x41dfffffffc00000 }, /* to int = 0x7fffffff */ {FLT_MAX}, {FLT_MAX + 1.0}, {DBL_MAX}, {INFINITY}, {NAN}, {.h = 0x7ff0000000000001}, /* SNAN */ {SNAN}, }; static void convert_double_to_half(void) { int i; printf("Converting double-precision to half-precision\n"); for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { double input = double_numbers[i].d; uint16_t output; feclearexcept(FE_ALL_EXCEPT); print_double_number(i, input); /* as we don't have _Float16 support */ #if defined(__arm__) /* asm("vcvtb.f16.f64 %0, %P1" : "=t" (output) : "x" (input)); */ output = input; #else asm("fcvt %h0, %d1" : "=w" (output) : "x" (input)); #endif print_half_number(i, output); } } static void convert_double_to_single(void) { int i; printf("Converting double-precision to single-precision\n"); for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { double input = double_numbers[i].d; uint32_t output; feclearexcept(FE_ALL_EXCEPT); print_double_number(i, input); #if defined(__arm__) asm("vcvt.f32.f64 %0, %P1" : "=w" (output) : "x" (input)); #else asm("fcvt %s0, %d1" : "=w" (output) : "x" (input)); #endif print_single_number(i, output); } } static void convert_double_to_integer(void) { int i; printf("Converting double-precision to integer\n"); for (i = 0; i < ARRAY_SIZE(double_numbers); ++i) { double input = double_numbers[i].d; int64_t output; feclearexcept(FE_ALL_EXCEPT); print_double_number(i, input); #if defined(__arm__) /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */ output = input; #else asm("fcvtzs %0, %d1" : "=r" (output) : "w" (input)); #endif print_int64(i, output); } } /* no handy defines for these numbers */ uint16_t half_numbers[] = { 0xffff, /* -NaN / AHP -Max */ 0xfcff, /* -NaN / AHP */ 0xfc01, /* -NaN / AHP */ 0xfc00, /* -Inf */ 0xfbff, /* -Max */ 0xc000, /* -2 */ 0xbc00, /* -1 */ 0x8001, /* -MIN subnormal */ 0x8000, /* -0 */ 0x0000, /* +0 */ 0x0001, /* MIN subnormal */ 0x3c00, /* 1 */ 0x7bff, /* Max */ 0x7c00, /* Inf */ 0x7c01, /* NaN / AHP */ 0x7cff, /* NaN / AHP */ 0x7fff, /* NaN / AHP +Max*/ }; static void convert_half_to_double(void) { int i; printf("Converting half-precision to double-precision\n"); for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { uint16_t input = half_numbers[i]; double output; feclearexcept(FE_ALL_EXCEPT); print_half_number(i, input); #if defined(__arm__) /* asm("vcvtb.f64.f16 %P0, %1" : "=w" (output) : "t" (input)); */ output = input; #else asm("fcvt %d0, %h1" : "=w" (output) : "x" (input)); #endif print_double_number(i, output); } } static void convert_half_to_single(void) { int i; printf("Converting half-precision to single-precision\n"); for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { uint16_t input = half_numbers[i]; float output; feclearexcept(FE_ALL_EXCEPT); print_half_number(i, input); #if defined(__arm__) asm("vcvtb.f32.f16 %0, %1" : "=w" (output) : "x" ((uint32_t)input)); #else asm("fcvt %s0, %h1" : "=w" (output) : "x" (input)); #endif print_single_number(i, output); } } static void convert_half_to_integer(void) { int i; printf("Converting half-precision to integer\n"); for (i = 0; i < ARRAY_SIZE(half_numbers); ++i) { uint16_t input = half_numbers[i]; int64_t output; feclearexcept(FE_ALL_EXCEPT); print_half_number(i, input); #if defined(__arm__) /* asm("vcvt.s32.f16 %0, %1" : "=t" (output) : "t" (input)); v8.2*/ output = input; #else asm("fcvt %s0, %h1" : "=w" (output) : "x" (input)); #endif print_int64(i, output); } } typedef struct { int flag; char *desc; } float_mapping; float_mapping round_flags[] = { { FE_TONEAREST, "to nearest" }, { FE_UPWARD, "upwards" }, { FE_DOWNWARD, "downwards" }, { FE_TOWARDZERO, "to zero" } }; int main(int argc, char *argv[argc]) { int i; printf("#### Enabling IEEE Half Precision\n"); for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { fesetround(round_flags[i].flag); printf("### Rounding %s\n", round_flags[i].desc); convert_single_to_half(); convert_single_to_double(); convert_double_to_half(); convert_double_to_single(); convert_half_to_single(); convert_half_to_double(); } /* convert to integer */ convert_single_to_integer(); convert_double_to_integer(); convert_half_to_integer(); /* And now with ARM alternative FP16 */ #if defined(__arm__) /* See glibc sysdeps/arm/fpu_control.h */ asm("mrc p10, 7, r1, cr1, cr0, 0\n\t" "orr r1, r1, %[flags]\n\t" "mcr p10, 7, r1, cr1, cr0, 0\n\t" : /* no output */ : [flags] "n" (1 << 26) : "r1" ); #else asm("mrs x1, fpcr\n\t" "orr x1, x1, %[flags]\n\t" "msr fpcr, x1\n\t" : /* no output */ : [flags] "n" (1 << 26) : "x1" ); #endif printf("#### Enabling ARM Alternative Half Precision\n"); for (i = 0; i < ARRAY_SIZE(round_flags); ++i) { fesetround(round_flags[i].flag); printf("### Rounding %s\n", round_flags[i].desc); convert_single_to_half(); convert_single_to_double(); convert_double_to_half(); convert_double_to_single(); convert_half_to_single(); convert_half_to_double(); } /* convert to integer */ convert_single_to_integer(); convert_double_to_integer(); convert_half_to_integer(); return 0; }