diff --git a/tools/lccrt_s/include/lccrt_s.h b/tools/lccrt_s/include/lccrt_s.h index c95c27c..4795a10 100644 --- a/tools/lccrt_s/include/lccrt_s.h +++ b/tools/lccrt_s/include/lccrt_s.h @@ -508,4 +508,7 @@ extern __lccrt_uint128_t __lccrt_fptoui_sat_i128f64( __lccrt_f64_t); extern __lccrt_nint96_t __lccrt_fshl_i96( __lccrt_nint96_t, __lccrt_nint96_t, __lccrt_nint96_t); +extern void __lccrt_vecbitpack( void *dst, void *src, int64_t veclen, int64_t elembitsize); +extern void __lccrt_vecbitunpack( void *dst, void *src, int64_t veclen, int64_t elembitsize); + extern uint8_t __lccrt_typetest_unsupported_yet( void); diff --git a/tools/lccrt_s/src/lccrt_vec.c b/tools/lccrt_s/src/lccrt_vec.c index 45f6af2..6134908 100644 --- a/tools/lccrt_s/src/lccrt_vec.c +++ b/tools/lccrt_s/src/lccrt_vec.c @@ -27,6 +27,68 @@ __lccopt_shuffle_##suffix( arg_type x, arg_type y, ind_type c) \ return (r); \ } /* __builtin_lccopt_shuffle_##suffix */ +static int __lccrt_bitwidth_bytesize( int bitsize) { + int r = 0; + + if ( bitsize <= 8 ) { + r = 1; + } else if ( bitsize <= 16 ) { + r = 2; + } else if ( bitsize <= 32 ) { + r = 4; + } else if ( bitsize <= 64 ) { + r = 8; + } else if ( bitsize <= 128) { + r = 16; + } else { + assert( 0); + } + + return (r); +} + +void +__lccrt_store_bytes( void *dst, uint64_t v, int64_t bytesize) { + if ( bytesize == 1 ) { + uint8_t *p8 = (uint8_t *)dst; + p8[0] = v; + } else if ( bytesize == 2 ) { + uint16_t *p16 = (uint16_t *)dst; + p16[0] = v; + } else if ( bytesize == 4 ) { + uint32_t *p32 = (uint32_t *)dst; + p32[0] = v; + } else if ( bytesize == 8 ) { + uint64_t *p64 = (uint64_t *)dst; + p64[0] = v; + } else { + assert( 0); + } +} + +uint64_t +__lccrt_load_bytes( void *src, int64_t bytesize) { + uint64_t r = 0; + + if ( bytesize == 1 ) { + uint8_t *p8 = (uint8_t *)src; + r = p8[0]; + } else if ( bytesize == 2 ) { + uint16_t *p16 = (uint16_t *)src; + r = p16[0]; + } else if ( bytesize == 4 ) { + uint32_t *p32 = (uint32_t *)src; + r = p32[0]; + } else if ( bytesize == 8 ) { + uint64_t *p64 = (uint64_t *)src; + r = p64[0]; + } else { + assert( 0); + } + + return (r); +} + __lccopt_vec_shuffle( v16i8, __lccrt_vec_si( 8, 16), __lccrt_vec_si( 32, 16), 16) __lccopt_vec_shuffle( v8i16, __lccrt_vec_si( 16, 8), __lccrt_vec_si( 32, 8), 8) __lccopt_vec_shuffle( v4i32, __lccrt_vec_si( 32, 4), __lccrt_vec_si( 32, 4), 4) @@ -103,3 +165,54 @@ __lccrt_sqrt_v4f32( __lccrt_vec_f( 32, 4) a) return (r); } + +/** + * Упаковка битов вектора нестандартных целых в (нестандартное) целое значение. + * Суммарное количество значимых бит не должно превышать 64. + * Пример: <3 x i6> -> i18. + */ +void +__lccrt_vecbitpack( void *dst, void *src, int64_t veclen, int64_t elembitsize) { + uint64_t r = 0; + int ebytes = __lccrt_bitwidth_bytesize( elembitsize); + int maskshift = 64 - elembitsize; + int totalbitsize = veclen*elembitsize; + + assert( (0 <= veclen) && (0 < elembitsize) && (totalbitsize <= 64)); + for ( int i = veclen - 1; i >= 0; --i ) { + uint64_t si = 0; + + si = __lccrt_load_bytes( (char *)src + i*ebytes, ebytes); + si = (si << maskshift) >> maskshift; + r = (r << elembitsize) | si; + } + + __lccrt_store_bytes( dst, r, __lccrt_bitwidth_bytesize( totalbitsize)); + + return; +} + +/** + * Распаковка битов (нестандартного) целого в вектор нестандартных целых. + * Суммарное количество значимых бит не должно превышать 64. + * Пример: i18 -> <3 x i6>. + */ +void +__lccrt_vecbitunpack( void *dst, void *src, int64_t veclen, int64_t elembitsize) { + int ebytes = __lccrt_bitwidth_bytesize( elembitsize); + int maskshift = 64 - elembitsize; + int totalbitsize = veclen*elembitsize; + uint64_t v0 = __lccrt_load_bytes( src, __lccrt_bitwidth_bytesize( totalbitsize)); + uint64_t v = v0; + + assert( (0 <= veclen) && (0 < elembitsize) && (totalbitsize <= 64)); + for ( int i = 0; i < veclen; ++i ) { + uint64_t si = 0; + + si = (v << maskshift) >> maskshift; + v = v >> elembitsize; + __lccrt_store_bytes( (char *)dst + i*ebytes, si, ebytes); + } + + return; +}