diff options
Diffstat (limited to 'sci-libs/openblas')
-rw-r--r-- | sci-libs/openblas/files/openblas-0.3.21-clang16.patch | 581 | ||||
-rw-r--r-- | sci-libs/openblas/openblas-0.3.21-r1.ebuild | 197 |
2 files changed, 778 insertions, 0 deletions
diff --git a/sci-libs/openblas/files/openblas-0.3.21-clang16.patch b/sci-libs/openblas/files/openblas-0.3.21-clang16.patch new file mode 100644 index 000000000000..051966b0a870 --- /dev/null +++ b/sci-libs/openblas/files/openblas-0.3.21-clang16.patch @@ -0,0 +1,581 @@ +https://github.com/xianyi/OpenBLAS/commit/f703846ad9400a8ea175cb8dd43e18c152aeab93 +https://github.com/xianyi/OpenBLAS/commit/515cf269291bec0d43651fe7bf99a71fb074a0ad +https://github.com/xianyi/OpenBLAS/commit/91110f92d218492d0efbdc1fdf34277ca45f4b36 +https://github.com/xianyi/OpenBLAS/commit/9402df5604e69f86f58953e3883f33f98c930baf +https://github.com/xianyi/OpenBLAS/commit/101a2c77c3f3610933f450cefca3e312edab2186 +https://src.fedoraproject.org/rpms/openblas/c/5f27d51cebe1c1bb6598d38326ece8dc0ac71ec7?branch=rawhide + +From f703846ad9400a8ea175cb8dd43e18c152aeab93 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> +Date: Sat, 13 Aug 2022 11:38:27 +0200 +Subject: [PATCH] Add function prototypes + +--- a/exports/gensymbol ++++ b/exports/gensymbol +@@ -4000,6 +4000,22 @@ case "$p1" in + no_underscore_objs="$no_underscore_objs $misc_common_objs" + + printf 'int main(void){\n' ++ for obj in $underscore_objs; do ++ [ "$obj" != "xerbla" ] && printf 'extern void %s%s%s%s();\n' \ ++ "$symbolprefix" "$obj" "$bu" "$symbolsuffix" ++ done ++ ++ for obj in $need_2underscore_objs; do ++ printf 'extern void %s%s%s%s%s();\n' \ ++ "$symbolprefix" "$obj" "$bu" "$bu" "$symbolsuffix" ++ done ++ ++ for obj in $no_underscore_objs; do ++ printf 'extern void %s%s%s();\n' \ ++ "$symbolprefix" "$obj" "$symbolsuffix" ++ done ++ ++ printf '\n' + for obj in $underscore_objs; do + [ "$obj" != "xerbla" ] && printf '%s%s%s%s();\n' \ + "$symbolprefix" "$obj" "$bu" "$symbolsuffix" +--- a/exports/gensymbol.pl ++++ b/exports/gensymbol.pl +@@ -3955,6 +3955,18 @@ + @no_underscore_objs = (@no_underscore_objs, @misc_common_objs); + + print "int main(void){\n"; ++ foreach $objs (@underscore_objs) { ++ print "extern void ", $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; ++ } ++ ++ foreach $objs (@need_2underscore_objs) { ++ print "extern void ", $symbolprefix, $objs, $bu, $bu, $symbolsuffix, "();\n"; ++ } ++ ++ foreach $objs (@no_underscore_objs) { ++ print "extern void ", $symbolprefix, $objs, $symbolsuffix, "();\n"; ++ } ++ + foreach $objs (@underscore_objs) { + print $symbolprefix, $objs, $bu, $symbolsuffix, "();\n" if $objs ne "xerbla"; + } + +From 515cf269291bec0d43651fe7bf99a71fb074a0ad Mon Sep 17 00:00:00 2001 +From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> +Date: Wed, 14 Sep 2022 11:48:36 +0200 +Subject: [PATCH] Fix pointer/integer argument mismatch in calls to pow() + +--- a/lapack-netlib/SRC/claed0.c ++++ b/lapack-netlib/SRC/claed0.c +@@ -796,10 +796,10 @@ f"> */ + + temp = log((real) (*n)) / log(2.f); + lgn = (integer) temp; +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; +--- a/lapack-netlib/SRC/claed7.c ++++ b/lapack-netlib/SRC/claed7.c +@@ -864,11 +864,11 @@ f"> */ + /* Form the z-vector which consists of the last row of Q_1 and the */ + /* first row of Q_2. */ + +- ptr = pow_ii(&c__2, tlvls) + 1; ++ ptr = pow_ii(c__2, *tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; +- ptr += pow_ii(&c__2, &i__2); ++ ptr += pow_ii(c__2, i__2); + /* L10: */ + } + curr = ptr + *curpbm; +--- a/lapack-netlib/SRC/clalsa.c ++++ b/lapack-netlib/SRC/clalsa.c +@@ -1051,7 +1051,7 @@ f"> */ + /* Finally go through the left singular vector matrices of all */ + /* the other subproblems bottom-up on the tree. */ + +- j = pow_ii(&c__2, &nlvl); ++ j = pow_ii(c__2, nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { +@@ -1065,7 +1065,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +@@ -1110,7 +1110,7 @@ f"> */ + ll = 1; + } else { + i__2 = lvl - 1; +- lf = pow_ii(&c__2, &i__2); ++ lf = pow_ii(c__2, i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; +--- a/lapack-netlib/SRC/cstedc.c ++++ b/lapack-netlib/SRC/cstedc.c +@@ -836,10 +836,10 @@ f"> */ + lrwmin = *n - 1 << 1; + } else if (icompz == 1) { + lgn = (integer) (log((real) (*n)) / log(2.f)); +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + lwmin = *n * *n; +--- a/lapack-netlib/SRC/dlaed0.c ++++ b/lapack-netlib/SRC/dlaed0.c +@@ -827,10 +827,10 @@ f"> */ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; +--- a/lapack-netlib/SRC/dlaed7.c ++++ b/lapack-netlib/SRC/dlaed7.c +@@ -885,11 +885,11 @@ f"> */ + /* Form the z-vector which consists of the last row of Q_1 and the */ + /* first row of Q_2. */ + +- ptr = pow_ii(&c__2, tlvls) + 1; ++ ptr = pow_ii(c__2, *tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; +- ptr += pow_ii(&c__2, &i__2); ++ ptr += pow_ii(c__2, i__2); + /* L10: */ + } + curr = ptr + *curpbm; +--- a/lapack-netlib/SRC/dlaeda.c ++++ b/lapack-netlib/SRC/dlaeda.c +@@ -754,7 +754,7 @@ f"> */ + /* scheme */ + + i__1 = *curlvl - 1; +- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; ++ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1; + + /* Determine size of these matrices. We add HALF to the value of */ + /* the SQRT in case the machine underestimates one of these square */ +@@ -781,12 +781,12 @@ f"> */ + /* rotations and permutation and then multiplying the center matrices */ + /* against the current Z. */ + +- ptr = pow_ii(&c__2, tlvls) + 1; ++ ptr = pow_ii(c__2, *tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; +- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - ++ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; +@@ -847,7 +847,7 @@ f"> */ + c__1); + + i__2 = *tlvls - k; +- ptr += pow_ii(&c__2, &i__2); ++ ptr += pow_ii(c__2, i__2); + /* L70: */ + } + +--- a/lapack-netlib/SRC/dlalsa.c ++++ b/lapack-netlib/SRC/dlalsa.c +@@ -951,7 +951,7 @@ f"> */ + /* Finally go through the left singular vector matrices of all */ + /* the other subproblems bottom-up on the tree. */ + +- j = pow_ii(&c__2, &nlvl); ++ j = pow_ii(c__2, nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { +@@ -965,7 +965,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +@@ -1010,7 +1010,7 @@ f"> */ + ll = 1; + } else { + i__2 = lvl - 1; +- lf = pow_ii(&c__2, &i__2); ++ lf = pow_ii(c__2, i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; +--- a/lapack-netlib/SRC/dlasd0.c ++++ b/lapack-netlib/SRC/dlasd0.c +@@ -824,7 +824,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +--- a/lapack-netlib/SRC/dlasda.c ++++ b/lapack-netlib/SRC/dlasda.c +@@ -1027,7 +1027,7 @@ f"> */ + + /* Now conquer each subproblem bottom-up. */ + +- j = pow_ii(&c__2, &nlvl); ++ j = pow_ii(c__2, nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +@@ -1039,7 +1039,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +--- a/lapack-netlib/SRC/dstedc.c ++++ b/lapack-netlib/SRC/dstedc.c +@@ -806,10 +806,10 @@ f"> */ + lwmin = *n - 1 << 1; + } else { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +--- a/lapack-netlib/SRC/slaed0.c ++++ b/lapack-netlib/SRC/slaed0.c +@@ -823,10 +823,10 @@ f"> */ + + temp = log((real) (*n)) / log(2.f); + lgn = (integer) temp; +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; +--- a/lapack-netlib/SRC/slaed7.c ++++ b/lapack-netlib/SRC/slaed7.c +@@ -883,11 +883,11 @@ f"> */ + /* Form the z-vector which consists of the last row of Q_1 and the */ + /* first row of Q_2. */ + +- ptr = pow_ii(&c__2, tlvls) + 1; ++ ptr = pow_ii(c__2, *tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; +- ptr += pow_ii(&c__2, &i__2); ++ ptr += pow_ii(c__2, i__2); + /* L10: */ + } + curr = ptr + *curpbm; +--- a/lapack-netlib/SRC/slaeda.c ++++ b/lapack-netlib/SRC/slaeda.c +@@ -753,7 +753,7 @@ f"> */ + /* scheme */ + + i__1 = *curlvl - 1; +- curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1; ++ curr = ptr + *curpbm * pow_ii(c__2, *curlvl) + pow_ii(c__2, i__1) - 1; + + /* Determine size of these matrices. We add HALF to the value of */ + /* the SQRT in case the machine underestimates one of these square */ +@@ -779,12 +779,12 @@ f"> */ + /* rotations and permutation and then multiplying the center matrices */ + /* against the current Z. */ + +- ptr = pow_ii(&c__2, tlvls) + 1; ++ ptr = pow_ii(c__2, *tlvls) + 1; + i__1 = *curlvl - 1; + for (k = 1; k <= i__1; ++k) { + i__2 = *curlvl - k; + i__3 = *curlvl - k - 1; +- curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) - ++ curr = ptr + *curpbm * pow_ii(c__2, i__2) + pow_ii(c__2, i__3) - + 1; + psiz1 = prmptr[curr + 1] - prmptr[curr]; + psiz2 = prmptr[curr + 2] - prmptr[curr + 1]; +@@ -844,7 +844,7 @@ f"> */ + c__1); + + i__2 = *tlvls - k; +- ptr += pow_ii(&c__2, &i__2); ++ ptr += pow_ii(c__2, i__2); + /* L70: */ + } + +--- a/lapack-netlib/SRC/slalsa.c ++++ b/lapack-netlib/SRC/slalsa.c +@@ -946,7 +946,7 @@ f"> */ + /* Finally go through the left singular vector matrices of all */ + /* the other subproblems bottom-up on the tree. */ + +- j = pow_ii(&c__2, &nlvl); ++ j = pow_ii(c__2, nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { +@@ -960,7 +960,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +@@ -1005,7 +1005,7 @@ f"> */ + ll = 1; + } else { + i__2 = lvl - 1; +- lf = pow_ii(&c__2, &i__2); ++ lf = pow_ii(c__2, i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; +--- a/lapack-netlib/SRC/slasd0.c ++++ b/lapack-netlib/SRC/slasd0.c +@@ -821,7 +821,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +--- a/lapack-netlib/SRC/slasda.c ++++ b/lapack-netlib/SRC/slasda.c +@@ -1023,7 +1023,7 @@ f"> */ + + /* Now conquer each subproblem bottom-up. */ + +- j = pow_ii(&c__2, &nlvl); ++ j = pow_ii(c__2, nlvl); + for (lvl = nlvl; lvl >= 1; --lvl) { + lvl2 = (lvl << 1) - 1; + +@@ -1035,7 +1035,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +--- a/lapack-netlib/SRC/sstedc.c ++++ b/lapack-netlib/SRC/sstedc.c +@@ -804,10 +804,10 @@ f"> */ + lwmin = *n - 1 << 1; + } else { + lgn = (integer) (log((real) (*n)) / log(2.f)); +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + if (icompz == 1) { +--- a/lapack-netlib/SRC/zlaed0.c ++++ b/lapack-netlib/SRC/zlaed0.c +@@ -793,10 +793,10 @@ f"> */ + + temp = log((doublereal) (*n)) / log(2.); + lgn = (integer) temp; +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + iprmpt = indxq + *n + 1; +--- a/lapack-netlib/SRC/zlaed7.c ++++ b/lapack-netlib/SRC/zlaed7.c +@@ -864,11 +864,11 @@ f"> */ + /* Form the z-vector which consists of the last row of Q_1 and the */ + /* first row of Q_2. */ + +- ptr = pow_ii(&c__2, tlvls) + 1; ++ ptr = pow_ii(c__2, *tlvls) + 1; + i__1 = *curlvl - 1; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = *tlvls - i__; +- ptr += pow_ii(&c__2, &i__2); ++ ptr += pow_ii(c__2, i__2); + /* L10: */ + } + curr = ptr + *curpbm; +--- a/lapack-netlib/SRC/zlalsa.c ++++ b/lapack-netlib/SRC/zlalsa.c +@@ -1051,7 +1051,7 @@ f"> */ + /* Finally go through the left singular vector matrices of all */ + /* the other subproblems bottom-up on the tree. */ + +- j = pow_ii(&c__2, &nlvl); ++ j = pow_ii(c__2, nlvl); + sqre = 0; + + for (lvl = nlvl; lvl >= 1; --lvl) { +@@ -1065,7 +1065,7 @@ f"> */ + ll = 1; + } else { + i__1 = lvl - 1; +- lf = pow_ii(&c__2, &i__1); ++ lf = pow_ii(c__2, i__1); + ll = (lf << 1) - 1; + } + i__1 = ll; +@@ -1110,7 +1110,7 @@ f"> */ + ll = 1; + } else { + i__2 = lvl - 1; +- lf = pow_ii(&c__2, &i__2); ++ lf = pow_ii(c__2, i__2); + ll = (lf << 1) - 1; + } + i__2 = lf; +--- a/lapack-netlib/SRC/zstedc.c ++++ b/lapack-netlib/SRC/zstedc.c +@@ -836,10 +836,10 @@ f"> */ + lrwmin = *n - 1 << 1; + } else if (icompz == 1) { + lgn = (integer) (log((doublereal) (*n)) / log(2.)); +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } +- if (pow_ii(&c__2, &lgn) < *n) { ++ if (pow_ii(c__2, lgn) < *n) { + ++lgn; + } + lwmin = *n * *n; + +From 91110f92d218492d0efbdc1fdf34277ca45f4b36 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> +Date: Wed, 14 Sep 2022 14:03:31 +0200 +Subject: [PATCH] fix missing return type in function declaration + +--- a/ctest/c_sblat1c.c ++++ b/ctest/c_sblat1c.c +@@ -969,7 +969,7 @@ real *sfac; + 1.17 }; + + /* Local variables */ +- extern /* Subroutine */ srottest_(); ++ extern /* Subroutine */ void srottest_(); + static integer i__, k, ksize; + extern /* Subroutine */ int stest_(), srotmtest_(); + static integer ki, kn; + +From 9402df5604e69f86f58953e3883f33f98c930baf Mon Sep 17 00:00:00 2001 +From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> +Date: Wed, 14 Sep 2022 21:44:34 +0200 +Subject: [PATCH] Fix missing external declaration + +--- a/driver/others/blas_server_omp.c ++++ b/driver/others/blas_server_omp.c +@@ -69,6 +69,8 @@ + + int blas_server_avail = 0; + ++extern int openblas_omp_adaptive_env(); ++ + static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; + #ifdef HAVE_C11 + static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; + +From 101a2c77c3f3610933f450cefca3e312edab2186 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> +Date: Thu, 15 Sep 2022 09:19:19 +0200 +Subject: [PATCH] Fix warnings + +--- a/kernel/x86_64/dgemm_ncopy_8_skylakex.c ++++ b/kernel/x86_64/dgemm_ncopy_8_skylakex.c +@@ -52,18 +52,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT * __restrict a, BLASLONG lda, FLOAT * __ + FLOAT ctemp05, ctemp06, ctemp07, ctemp08; + FLOAT ctemp09, ctemp10, ctemp11, ctemp12; + FLOAT ctemp13, ctemp14, ctemp15, ctemp16; +- FLOAT ctemp17, ctemp18, ctemp19, ctemp20; +- FLOAT ctemp21, ctemp22, ctemp23, ctemp24; +- FLOAT ctemp25, ctemp26, ctemp27, ctemp28; +- FLOAT ctemp29, ctemp30, ctemp31, ctemp32; +- FLOAT ctemp33, ctemp34, ctemp35, ctemp36; +- FLOAT ctemp37, ctemp38, ctemp39, ctemp40; +- FLOAT ctemp41, ctemp42, ctemp43, ctemp44; +- FLOAT ctemp45, ctemp46, ctemp47, ctemp48; +- FLOAT ctemp49, ctemp50, ctemp51, ctemp52; +- FLOAT ctemp53, ctemp54, ctemp55, ctemp56; +- FLOAT ctemp57, ctemp58, ctemp59, ctemp60; +- FLOAT ctemp61, ctemp62, ctemp63, ctemp64; ++ FLOAT ctemp17 /*, ctemp18, ctemp19, ctemp20*/ ; ++ FLOAT /*ctemp21, ctemp22,*/ ctemp23, ctemp24; ++ FLOAT ctemp25 /*, ctemp26, ctemp27, ctemp28*/ ; ++ FLOAT /*ctemp29, ctemp30,*/ ctemp31, ctemp32; ++ FLOAT ctemp33 /*, ctemp34, ctemp35, ctemp36*/ ; ++ FLOAT /*ctemp37, ctemp38,*/ ctemp39, ctemp40; ++ FLOAT ctemp41 /*, ctemp42, ctemp43, ctemp44*/ ; ++ FLOAT /*ctemp45, ctemp46,*/ ctemp47, ctemp48; ++ FLOAT ctemp49 /*, ctemp50, ctemp51, ctemp52*/ ; ++ FLOAT /*ctemp53, ctemp54,*/ ctemp55, ctemp56; ++ FLOAT ctemp57 /*, ctemp58, ctemp59, ctemp60*/ ; ++ FLOAT /*ctemp61, ctemp62,*/ ctemp63, ctemp64; + + + aoffset = a; +--- a/kernel/x86_64/omatcopy_rt.c ++++ b/kernel/x86_64/omatcopy_rt.c +@@ -142,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15");\ + } + int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb){ +- float *src, *dst, *dst_tmp, *src_base, *dst_base; ++ float *src, *dst, *dst_tmp=0, *src_base, *dst_base; + uint64_t src_ld_bytes = (uint64_t)lda * sizeof(float), dst_ld_bytes = (uint64_t)ldb * sizeof(float), num_rows = 0; + BLASLONG cols_left, rows_done; float ALPHA = alpha; + if(ALPHA==0.0){ + diff --git a/sci-libs/openblas/openblas-0.3.21-r1.ebuild b/sci-libs/openblas/openblas-0.3.21-r1.ebuild new file mode 100644 index 000000000000..f8be930b1154 --- /dev/null +++ b/sci-libs/openblas/openblas-0.3.21-r1.ebuild @@ -0,0 +1,197 @@ +# Copyright 1999-2022 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=7 + +inherit fortran-2 toolchain-funcs + +DESCRIPTION="Optimized BLAS library based on GotoBLAS2" +HOMEPAGE="https://github.com/xianyi/OpenBLAS" +SRC_URI="https://github.com/xianyi/OpenBLAS/archive/v${PV}.tar.gz -> ${P}.tar.gz" +S="${WORKDIR}"/OpenBLAS-${PV} + +LICENSE="BSD" +SLOT="0" +KEYWORDS="~amd64 ~arm ~arm64 ~loong ~riscv ~x86 ~amd64-linux ~x86-linux ~x64-macos" +IUSE="dynamic eselect-ldso index-64bit openmp pthread relapack test" +REQUIRED_USE="?? ( openmp pthread )" +RESTRICT="!test? ( test )" + +RDEPEND=" + eselect-ldso? ( + >=app-eselect/eselect-blas-0.2 + >=app-eselect/eselect-lapack-0.2 + ) +" +BDEPEND="virtual/pkgconfig" + +PATCHES=( + "${FILESDIR}/${PN}-0.3.12-shared-blas-lapack.patch" + "${FILESDIR}/${PN}-0.3.21-fix-loong.patch" + "${FILESDIR}/${P}-clang16.patch" +) + +pkg_pretend() { + [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp + + elog "This software has a massive number of options that" + elog "are configurable and it is *impossible* for all of" + elog "those to fit inside any manageable ebuild." + elog "The Gentoo provided package has enough to build" + elog "a fully optimized library for your targeted CPU." + elog "You can set the CPU target using the environment" + elog "variable - OPENBLAS_TARGET or it will be detected" + elog "automatically from the target toolchain (supports" + elog "cross compilation toolchains)." + elog "You can control the maximum number of threads" + elog "using OPENBLAS_NTHREAD, default=64 and number of " + elog "parallel calls to allow before further calls wait" + elog "using OPENBLAS_NPARALLEL, default=8." +} + +pkg_setup() { + fortran-2_pkg_setup + + # List of most configurable options - Makefile.rule + + # https://github.com/xianyi/OpenBLAS/pull/2663 + tc-export CC FC LD AR AS RANLIB + + # HOSTCC is used for scripting + export HOSTCC="$(tc-getBUILD_CC)" + + # threading options + [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp + USE_THREAD=0 + if use openmp; then + USE_THREAD=1; USE_OPENMP=1; + elif use pthread; then + USE_THREAD=1; USE_OPENMP=0; + fi + export USE_THREAD USE_OPENMP + + # disable submake with -j and default optimization flags + # in Makefile.system + # Makefile.rule says to not modify COMMON_OPT/FCOMMON_OPT... + export MAKE_NB_JOBS=-1 \ + COMMON_OPT=" " \ + FCOMMON_OPT=" " + + # Target CPU ARCH options + # generally detected automatically from cross toolchain + use dynamic && \ + export DYNAMIC_ARCH=1 \ + NO_AFFINITY=1 \ + TARGET=GENERIC + + export NUM_PARALLEL=${OPENBLAS_NPARALLEL:-8} \ + NUM_THREADS=${OPENBLAS_NTHREAD:-64} + + # setting OPENBLAS_TARGET to override auto detection + # in case the toolchain is not enough to detect + # https://github.com/xianyi/OpenBLAS/blob/develop/TargetList.txt + if ! use dynamic && [[ ! -z "${OPENBLAS_TARGET}" ]] ; then + export TARGET="${OPENBLAS_TARGET}" + fi + + export NO_STATIC=1 + + BUILD_RELAPACK=1 + if ! use relapack; then + BUILD_RELAPACK=0 + fi + + export PREFIX="${EPREFIX}/usr" BUILD_RELAPACK +} + +src_prepare() { + default + + # Don't build the tests as part of "make all". We'll do + # it explicitly later if the test phase is enabled. + sed -e "/^all ::/s/tests //" -i Makefile || die + + # if 64bit-index is needed, create second library + # with LIBPREFIX=libopenblas64 + if use index-64bit; then + cp -aL "${S}" "${S}-index-64bit" || die + fi +} + +src_compile() { + default + cd interface || die + emake shared-blas-lapack + + if use index-64bit; then + emake -C"${S}-index-64bit" \ + INTERFACE64=1 \ + LIBPREFIX=libopenblas64 + fi +} + +src_test() { + emake tests +} + +src_install() { + emake install DESTDIR="${D}" \ + OPENBLAS_INCLUDE_DIR='$(PREFIX)'/include/${PN} \ + OPENBLAS_LIBRARY_DIR='$(PREFIX)'/$(get_libdir) + + dodoc GotoBLAS_*.txt *.md Changelog.txt + + if use index-64bit; then + dolib.so "${S}-index-64bit"/libopenblas64*.so* + fi + + if use eselect-ldso; then + insinto /usr/$(get_libdir)/blas/openblas/ + doins interface/libblas.so.3 + dosym libblas.so.3 usr/$(get_libdir)/blas/openblas/libblas.so + doins interface/libcblas.so.3 + dosym libcblas.so.3 usr/$(get_libdir)/blas/openblas/libcblas.so + + insinto /usr/$(get_libdir)/lapack/openblas/ + doins interface/liblapack.so.3 + dosym liblapack.so.3 usr/$(get_libdir)/lapack/openblas/liblapack.so + doins interface/liblapacke.so.3 + dosym liblapacke.so.3 usr/$(get_libdir)/lapack/openblas/liblapacke.so + fi +} + +pkg_postinst() { + use eselect-ldso || return + local libdir=$(get_libdir) me="openblas" + + # check blas + eselect blas add ${libdir} "${EROOT}"/usr/${libdir}/blas/${me} ${me} + local current_blas=$(eselect blas show ${libdir} | cut -d' ' -f2) + if [[ ${current_blas} == "${me}" || -z ${current_blas} ]]; then + eselect blas set ${libdir} ${me} + elog "Current eselect: BLAS/CBLAS ($libdir) -> [${current_blas}]." + else + elog "Current eselect: BLAS/CBLAS ($libdir) -> [${current_blas}]." + elog "To use blas [${me}] implementation, you have to issue (as root):" + elog "\t eselect blas set ${libdir} ${me}" + fi + + # check lapack + eselect lapack add ${libdir} "${EROOT}"/usr/${libdir}/lapack/${me} ${me} + local current_lapack=$(eselect lapack show ${libdir} | cut -d' ' -f2) + if [[ ${current_lapack} == "${me}" || -z ${current_lapack} ]]; then + eselect lapack set ${libdir} ${me} + elog "Current eselect: LAPACK ($libdir) -> [${current_lapack}]." + else + elog "Current eselect: LAPACK ($libdir) -> [${current_lapack}]." + elog "To use lapack [${me}] implementation, you have to issue (as root):" + elog "\t eselect lapack set ${libdir} ${me}" + fi +} + +pkg_postrm() { + if use eselect-ldso; then + eselect blas validate + eselect lapack validate + fi +} |