ecomp.28.143430

 :
   liblccopt: ecomp.28.143142 - ecomp.r28.143430
This commit is contained in:
stepanov 2024-02-04 01:23:09 +03:00
parent 66be8012da
commit d3efee6803
2 changed files with 116 additions and 0 deletions

View File

@ -508,4 +508,7 @@ extern __lccrt_uint128_t __lccrt_fptoui_sat_i128f64( __lccrt_f64_t);
extern __lccrt_nint96_t __lccrt_fshl_i96( __lccrt_nint96_t, __lccrt_nint96_t, __lccrt_nint96_t);
extern void __lccrt_vecbitpack( void *dst, void *src, int64_t veclen, int64_t elembitsize);
extern void __lccrt_vecbitunpack( void *dst, void *src, int64_t veclen, int64_t elembitsize);
extern uint8_t __lccrt_typetest_unsupported_yet( void);

View File

@ -27,6 +27,68 @@ __lccopt_shuffle_##suffix( arg_type x, arg_type y, ind_type c) \
return (r); \
} /* __builtin_lccopt_shuffle_##suffix */
static int __lccrt_bitwidth_bytesize( int bitsize) {
int r = 0;
if ( bitsize <= 8 ) {
r = 1;
} else if ( bitsize <= 16 ) {
r = 2;
} else if ( bitsize <= 32 ) {
r = 4;
} else if ( bitsize <= 64 ) {
r = 8;
} else if ( bitsize <= 128) {
r = 16;
} else {
assert( 0);
}
return (r);
}
void
__lccrt_store_bytes( void *dst, uint64_t v, int64_t bytesize) {
if ( bytesize == 1 ) {
uint8_t *p8 = (uint8_t *)dst;
p8[0] = v;
} else if ( bytesize == 2 ) {
uint16_t *p16 = (uint16_t *)dst;
p16[0] = v;
} else if ( bytesize == 4 ) {
uint32_t *p32 = (uint32_t *)dst;
p32[0] = v;
} else if ( bytesize == 8 ) {
uint64_t *p64 = (uint64_t *)dst;
p64[0] = v;
} else {
assert( 0);
}
}
uint64_t
__lccrt_load_bytes( void *src, int64_t bytesize) {
uint64_t r = 0;
if ( bytesize == 1 ) {
uint8_t *p8 = (uint8_t *)src;
r = p8[0];
} else if ( bytesize == 2 ) {
uint16_t *p16 = (uint16_t *)src;
r = p16[0];
} else if ( bytesize == 4 ) {
uint32_t *p32 = (uint32_t *)src;
r = p32[0];
} else if ( bytesize == 8 ) {
uint64_t *p64 = (uint64_t *)src;
r = p64[0];
} else {
assert( 0);
}
return (r);
}
__lccopt_vec_shuffle( v16i8, __lccrt_vec_si( 8, 16), __lccrt_vec_si( 32, 16), 16)
__lccopt_vec_shuffle( v8i16, __lccrt_vec_si( 16, 8), __lccrt_vec_si( 32, 8), 8)
__lccopt_vec_shuffle( v4i32, __lccrt_vec_si( 32, 4), __lccrt_vec_si( 32, 4), 4)
@ -103,3 +165,54 @@ __lccrt_sqrt_v4f32( __lccrt_vec_f( 32, 4) a)
return (r);
}
/**
* õÐÁËÏ×ËÁ ÂÉÔÏ× ×ÅËÔÏÒÁ ÎÅÓÔÁÎÄÁÒÔÎÙÈ ÃÅÌÙÈ × (ÎÅÓÔÁÎÄÁÒÔÎÏÅ) ÃÅÌÏÅ ÚÎÁÞÅÎÉÅ.
* óÕÍÍÁÒÎÏÅ ËÏÌÉÞÅÓÔ×Ï ÚÎÁÞÉÍÙÈ ÂÉÔ ÎÅ ÄÏÌÖÎÏ ÐÒÅ×ÙÛÁÔØ 64.
* ðÒÉÍÅÒ: <3 x i6> -> i18.
*/
void
__lccrt_vecbitpack( void *dst, void *src, int64_t veclen, int64_t elembitsize) {
uint64_t r = 0;
int ebytes = __lccrt_bitwidth_bytesize( elembitsize);
int maskshift = 64 - elembitsize;
int totalbitsize = veclen*elembitsize;
assert( (0 <= veclen) && (0 < elembitsize) && (totalbitsize <= 64));
for ( int i = veclen - 1; i >= 0; --i ) {
uint64_t si = 0;
si = __lccrt_load_bytes( (char *)src + i*ebytes, ebytes);
si = (si << maskshift) >> maskshift;
r = (r << elembitsize) | si;
}
__lccrt_store_bytes( dst, r, __lccrt_bitwidth_bytesize( totalbitsize));
return;
}
/**
* òÁÓÐÁËÏ×ËÁ ÂÉÔÏ× (ÎÅÓÔÁÎÄÁÒÔÎÏÇÏ) ÃÅÌÏÇÏ × ×ÅËÔÏÒ ÎÅÓÔÁÎÄÁÒÔÎÙÈ ÃÅÌÙÈ.
* óÕÍÍÁÒÎÏÅ ËÏÌÉÞÅÓÔ×Ï ÚÎÁÞÉÍÙÈ ÂÉÔ ÎÅ ÄÏÌÖÎÏ ÐÒÅ×ÙÛÁÔØ 64.
* ðÒÉÍÅÒ: i18 -> <3 x i6>.
*/
void
__lccrt_vecbitunpack( void *dst, void *src, int64_t veclen, int64_t elembitsize) {
int ebytes = __lccrt_bitwidth_bytesize( elembitsize);
int maskshift = 64 - elembitsize;
int totalbitsize = veclen*elembitsize;
uint64_t v0 = __lccrt_load_bytes( src, __lccrt_bitwidth_bytesize( totalbitsize));
uint64_t v = v0;
assert( (0 <= veclen) && (0 < elembitsize) && (totalbitsize <= 64));
for ( int i = 0; i < veclen; ++i ) {
uint64_t si = 0;
si = (v << maskshift) >> maskshift;
v = v >> elembitsize;
__lccrt_store_bytes( (char *)dst + i*ebytes, si, ebytes);
}
return;
}