re PR fortran/86704 (Segmentation fault when using matmul in combination with transpose)
2018-08-25 Thomas Koenig <tkoenig@gcc.gnu.org> PR libfortran/86704 * m4/matmul_internal.m4: Correct calculation of needed buffer size for arrays of shape (1,n). * generated/matmul_c10.c: Regenerated * generated/matmul_c16.c: Regenerated * generated/matmul_c4.c: Regenerated * generated/matmul_c8.c: Regenerated * generated/matmul_i1.c: Regenerated * generated/matmul_i16.c: Regenerated * generated/matmul_i2.c: Regenerated * generated/matmul_i4.c: Regenerated * generated/matmul_i8.c: Regenerated * generated/matmul_r10.c: Regenerated * generated/matmul_r16.c: Regenerated * generated/matmul_r4.c: Regenerated * generated/matmul_r8.c: Regenerated * generated/matmulavx128_c10.c: Regenerated * generated/matmulavx128_c16.c: Regenerated * generated/matmulavx128_c4.c: Regenerated * generated/matmulavx128_c8.c: Regenerated * generated/matmulavx128_i1.c: Regenerated * generated/matmulavx128_i16.c: Regenerated * generated/matmulavx128_i2.c: Regenerated * generated/matmulavx128_i4.c: Regenerated * generated/matmulavx128_i8.c: Regenerated * generated/matmulavx128_r10.c: Regenerated * generated/matmulavx128_r16.c: Regenerated * generated/matmulavx128_r4.c: Regenerated * generated/matmulavx128_r8.c: Regenerated 2018-08-25 Thomas Koenig <tkoenig@gcc.gnu.org> PR libfortran/86704 * gfortran.dg/matmul_19.f90: New test. From-SVN: r263856
This commit is contained in:
parent
75e96bc80c
commit
4f4fabd733
|
@ -1,3 +1,8 @@
|
||||||
|
2018-08-25 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||||
|
|
||||||
|
PR libfortran/86704
|
||||||
|
* gfortran.dg/matmul_19.f90: New test.
|
||||||
|
|
||||||
2018-08-25 Janus Weil <janus@gcc.gnu.org>
|
2018-08-25 Janus Weil <janus@gcc.gnu.org>
|
||||||
|
|
||||||
PR fortran/86545
|
PR fortran/86545
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
! { dg-do run }
|
||||||
|
! { dg-options "-finline-matmul-limit=0" }
|
||||||
|
! PR 86704 - this used to segfault.
|
||||||
|
|
||||||
|
program testmaticovenasobeni
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
character(len=10) :: line
|
||||||
|
write (unit=line,fmt=*) testmatmul(120,1,3)
|
||||||
|
|
||||||
|
contains
|
||||||
|
|
||||||
|
function testmatmul(m,n,o)
|
||||||
|
integer, intent(in) :: m,n,o
|
||||||
|
real :: A(n,m),B(n,o),C(m,o)
|
||||||
|
logical :: testmatmul
|
||||||
|
|
||||||
|
call random_number(A)
|
||||||
|
call random_number(B)
|
||||||
|
|
||||||
|
C=matmul(transpose(A),B)
|
||||||
|
testmatmul=.true.
|
||||||
|
end function
|
||||||
|
|
||||||
|
end program testmaticovenasobeni
|
|
@ -1,3 +1,35 @@
|
||||||
|
2018-08-25 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||||
|
|
||||||
|
PR libfortran/86704
|
||||||
|
* m4/matmul_internal.m4: Correct calculation of needed buffer size
|
||||||
|
for arrays of shape (1,n).
|
||||||
|
* generated/matmul_c10.c: Regenerated
|
||||||
|
* generated/matmul_c16.c: Regenerated
|
||||||
|
* generated/matmul_c4.c: Regenerated
|
||||||
|
* generated/matmul_c8.c: Regenerated
|
||||||
|
* generated/matmul_i1.c: Regenerated
|
||||||
|
* generated/matmul_i16.c: Regenerated
|
||||||
|
* generated/matmul_i2.c: Regenerated
|
||||||
|
* generated/matmul_i4.c: Regenerated
|
||||||
|
* generated/matmul_i8.c: Regenerated
|
||||||
|
* generated/matmul_r10.c: Regenerated
|
||||||
|
* generated/matmul_r16.c: Regenerated
|
||||||
|
* generated/matmul_r4.c: Regenerated
|
||||||
|
* generated/matmul_r8.c: Regenerated
|
||||||
|
* generated/matmulavx128_c10.c: Regenerated
|
||||||
|
* generated/matmulavx128_c16.c: Regenerated
|
||||||
|
* generated/matmulavx128_c4.c: Regenerated
|
||||||
|
* generated/matmulavx128_c8.c: Regenerated
|
||||||
|
* generated/matmulavx128_i1.c: Regenerated
|
||||||
|
* generated/matmulavx128_i16.c: Regenerated
|
||||||
|
* generated/matmulavx128_i2.c: Regenerated
|
||||||
|
* generated/matmulavx128_i4.c: Regenerated
|
||||||
|
* generated/matmulavx128_i8.c: Regenerated
|
||||||
|
* generated/matmulavx128_r10.c: Regenerated
|
||||||
|
* generated/matmulavx128_r16.c: Regenerated
|
||||||
|
* generated/matmulavx128_r4.c: Regenerated
|
||||||
|
* generated/matmulavx128_r8.c: Regenerated
|
||||||
|
|
||||||
2018-08-23 David Edelsohn <dje.gcc@gmail.com>
|
2018-08-23 David Edelsohn <dje.gcc@gmail.com>
|
||||||
|
|
||||||
* async.h (ASYNC_IO): Revert _AIX test.
|
* async.h (ASYNC_IO): Revert _AIX test.
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -317,8 +317,13 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -869,8 +874,13 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1421,8 +1431,13 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -1987,8 +2002,13 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -2613,8 +2633,13 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -282,8 +282,13 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
@ -835,8 +840,13 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
|
@ -233,8 +233,13 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Adjust size of t1 to what is needed. */
|
/* Adjust size of t1 to what is needed. */
|
||||||
index_type t1_dim;
|
index_type t1_dim, a_sz;
|
||||||
t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
|
if (aystride == 1)
|
||||||
|
a_sz = rystride;
|
||||||
|
else
|
||||||
|
a_sz = a_dim1;
|
||||||
|
|
||||||
|
t1_dim = a_sz * 256 + b_dim1;
|
||||||
if (t1_dim > 65536)
|
if (t1_dim > 65536)
|
||||||
t1_dim = 65536;
|
t1_dim = 65536;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue