/root/doris/contrib/openblas/lapack-netlib/SRC/sgelsd.c
Line | Count | Source |
1 | | #include <math.h> |
2 | | #include <stdlib.h> |
3 | | #include <string.h> |
4 | | #include <stdio.h> |
5 | | #include <complex.h> |
6 | | #ifdef complex |
7 | | #undef complex |
8 | | #endif |
9 | | #ifdef I |
10 | | #undef I |
11 | | #endif |
12 | | |
13 | | #if defined(_WIN64) |
14 | | typedef long long BLASLONG; |
15 | | typedef unsigned long long BLASULONG; |
16 | | #else |
17 | | typedef long BLASLONG; |
18 | | typedef unsigned long BLASULONG; |
19 | | #endif |
20 | | |
21 | | #ifdef LAPACK_ILP64 |
22 | | typedef BLASLONG blasint; |
23 | | #if defined(_WIN64) |
24 | | #define blasabs(x) llabs(x) |
25 | | #else |
26 | | #define blasabs(x) labs(x) |
27 | | #endif |
28 | | #else |
29 | | typedef int blasint; |
30 | | #define blasabs(x) abs(x) |
31 | | #endif |
32 | | |
33 | | typedef blasint integer; |
34 | | |
35 | | typedef unsigned int uinteger; |
36 | | typedef char *address; |
37 | | typedef short int shortint; |
38 | | typedef float real; |
39 | | typedef double doublereal; |
40 | | typedef struct { real r, i; } complex; |
41 | | typedef struct { doublereal r, i; } doublecomplex; |
42 | | #ifdef _MSC_VER |
43 | | static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} |
44 | | static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} |
45 | | static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} |
46 | | static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} |
47 | | #else |
48 | 0 | static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} |
49 | 0 | static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} |
50 | 0 | static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} |
51 | 0 | static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} |
52 | | #endif |
53 | | #define pCf(z) (*_pCf(z)) |
54 | | #define pCd(z) (*_pCd(z)) |
55 | | typedef blasint logical; |
56 | | |
57 | | typedef char logical1; |
58 | | typedef char integer1; |
59 | | |
60 | | #define TRUE_ (1) |
61 | | #define FALSE_ (0) |
62 | | |
63 | | /* Extern is for use with -E */ |
64 | | #ifndef Extern |
65 | | #define Extern extern |
66 | | #endif |
67 | | |
68 | | /* I/O stuff */ |
69 | | |
70 | | typedef int flag; |
71 | | typedef int ftnlen; |
72 | | typedef int ftnint; |
73 | | |
74 | | /*external read, write*/ |
75 | | typedef struct |
76 | | { flag cierr; |
77 | | ftnint ciunit; |
78 | | flag ciend; |
79 | | char *cifmt; |
80 | | ftnint cirec; |
81 | | } cilist; |
82 | | |
83 | | /*internal read, write*/ |
84 | | typedef struct |
85 | | { flag icierr; |
86 | | char *iciunit; |
87 | | flag iciend; |
88 | | char *icifmt; |
89 | | ftnint icirlen; |
90 | | ftnint icirnum; |
91 | | } icilist; |
92 | | |
93 | | /*open*/ |
94 | | typedef struct |
95 | | { flag oerr; |
96 | | ftnint ounit; |
97 | | char *ofnm; |
98 | | ftnlen ofnmlen; |
99 | | char *osta; |
100 | | char *oacc; |
101 | | char *ofm; |
102 | | ftnint orl; |
103 | | char *oblnk; |
104 | | } olist; |
105 | | |
106 | | /*close*/ |
107 | | typedef struct |
108 | | { flag cerr; |
109 | | ftnint cunit; |
110 | | char *csta; |
111 | | } cllist; |
112 | | |
113 | | /*rewind, backspace, endfile*/ |
114 | | typedef struct |
115 | | { flag aerr; |
116 | | ftnint aunit; |
117 | | } alist; |
118 | | |
119 | | /* inquire */ |
120 | | typedef struct |
121 | | { flag inerr; |
122 | | ftnint inunit; |
123 | | char *infile; |
124 | | ftnlen infilen; |
125 | | ftnint *inex; /*parameters in standard's order*/ |
126 | | ftnint *inopen; |
127 | | ftnint *innum; |
128 | | ftnint *innamed; |
129 | | char *inname; |
130 | | ftnlen innamlen; |
131 | | char *inacc; |
132 | | ftnlen inacclen; |
133 | | char *inseq; |
134 | | ftnlen inseqlen; |
135 | | char *indir; |
136 | | ftnlen indirlen; |
137 | | char *infmt; |
138 | | ftnlen infmtlen; |
139 | | char *inform; |
140 | | ftnint informlen; |
141 | | char *inunf; |
142 | | ftnlen inunflen; |
143 | | ftnint *inrecl; |
144 | | ftnint *innrec; |
145 | | char *inblank; |
146 | | ftnlen inblanklen; |
147 | | } inlist; |
148 | | |
149 | | #define VOID void |
150 | | |
151 | | union Multitype { /* for multiple entry points */ |
152 | | integer1 g; |
153 | | shortint h; |
154 | | integer i; |
155 | | /* longint j; */ |
156 | | real r; |
157 | | doublereal d; |
158 | | complex c; |
159 | | doublecomplex z; |
160 | | }; |
161 | | |
162 | | typedef union Multitype Multitype; |
163 | | |
164 | | struct Vardesc { /* for Namelist */ |
165 | | char *name; |
166 | | char *addr; |
167 | | ftnlen *dims; |
168 | | int type; |
169 | | }; |
170 | | typedef struct Vardesc Vardesc; |
171 | | |
172 | | struct Namelist { |
173 | | char *name; |
174 | | Vardesc **vars; |
175 | | int nvars; |
176 | | }; |
177 | | typedef struct Namelist Namelist; |
178 | | |
179 | | #define abs(x) ((x) >= 0 ? (x) : -(x)) |
180 | | #define dabs(x) (fabs(x)) |
181 | 0 | #define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) |
182 | 0 | #define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) |
183 | | #define dmin(a,b) (f2cmin(a,b)) |
184 | | #define dmax(a,b) (f2cmax(a,b)) |
185 | | #define bit_test(a,b) ((a) >> (b) & 1) |
186 | | #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) |
187 | | #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) |
188 | | |
189 | | #define abort_() { sig_die("Fortran abort routine called", 1); } |
190 | | #define c_abs(z) (cabsf(Cf(z))) |
191 | | #define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } |
192 | | #ifdef _MSC_VER |
193 | | #define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} |
194 | | #define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);} |
195 | | #else |
196 | | #define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} |
197 | | #define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} |
198 | | #endif |
199 | | #define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} |
200 | | #define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} |
201 | | #define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} |
202 | | //#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} |
203 | | #define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} |
204 | | #define d_abs(x) (fabs(*(x))) |
205 | | #define d_acos(x) (acos(*(x))) |
206 | | #define d_asin(x) (asin(*(x))) |
207 | | #define d_atan(x) (atan(*(x))) |
208 | | #define d_atn2(x, y) (atan2(*(x),*(y))) |
209 | | #define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } |
210 | | #define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } |
211 | | #define d_cos(x) (cos(*(x))) |
212 | | #define d_cosh(x) (cosh(*(x))) |
213 | | #define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) |
214 | | #define d_exp(x) (exp(*(x))) |
215 | | #define d_imag(z) (cimag(Cd(z))) |
216 | | #define r_imag(z) (cimagf(Cf(z))) |
217 | | #define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) |
218 | | #define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) |
219 | | #define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) |
220 | | #define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) |
221 | | #define d_log(x) (log(*(x))) |
222 | | #define d_mod(x, y) (fmod(*(x), *(y))) |
223 | | #define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) |
224 | | #define d_nint(x) u_nint(*(x)) |
225 | | #define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) |
226 | | #define d_sign(a,b) u_sign(*(a),*(b)) |
227 | | #define r_sign(a,b) u_sign(*(a),*(b)) |
228 | | #define d_sin(x) (sin(*(x))) |
229 | | #define d_sinh(x) (sinh(*(x))) |
230 | | #define d_sqrt(x) (sqrt(*(x))) |
231 | | #define d_tan(x) (tan(*(x))) |
232 | | #define d_tanh(x) (tanh(*(x))) |
233 | | #define i_abs(x) abs(*(x)) |
234 | | #define i_dnnt(x) ((integer)u_nint(*(x))) |
235 | | #define i_len(s, n) (n) |
236 | | #define i_nint(x) ((integer)u_nint(*(x))) |
237 | | #define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) |
238 | | #define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } |
239 | | #define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) |
240 | | #define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } |
241 | | #define sig_die(s, kill) { exit(1); } |
242 | | #define s_stop(s, n) {exit(0);} |
243 | | #define z_abs(z) (cabs(Cd(z))) |
244 | | #define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} |
245 | | #define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} |
246 | | #define myexit_() break; |
247 | | #define mycycle() continue; |
248 | | #define myceiling(w) {ceil(w)} |
249 | | #define myhuge(w) {HUGE_VAL} |
250 | | //#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} |
251 | | #define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)} |
252 | | |
253 | | /* -- translated by f2c (version 20000121). |
254 | | You must link the resulting object file with the libraries: |
255 | | -lf2c -lm (in that order) |
256 | | */ |
257 | | |
258 | | |
259 | | |
260 | | |
261 | | /* Table of constant values */ |
262 | | |
263 | | static integer c__9 = 9; |
264 | | static integer c__0 = 0; |
265 | | static integer c__6 = 6; |
266 | | static integer c_n1 = -1; |
267 | | static integer c__1 = 1; |
268 | | static real c_b81 = 0.f; |
269 | | |
270 | | /* > \brief <b> SGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices</b |
271 | | > */ |
272 | | |
273 | | /* =========== DOCUMENTATION =========== */ |
274 | | |
275 | | /* Online html documentation available at */ |
276 | | /* http://www.netlib.org/lapack/explore-html/ */ |
277 | | |
278 | | /* > \htmlonly */ |
279 | | /* > Download SGELSD + dependencies */ |
280 | | /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgelsd. |
281 | | f"> */ |
282 | | /* > [TGZ]</a> */ |
283 | | /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgelsd. |
284 | | f"> */ |
285 | | /* > [ZIP]</a> */ |
286 | | /* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgelsd. |
287 | | f"> */ |
288 | | /* > [TXT]</a> */ |
289 | | /* > \endhtmlonly */ |
290 | | |
291 | | /* Definition: */ |
292 | | /* =========== */ |
293 | | |
294 | | /* SUBROUTINE SGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, */ |
295 | | /* RANK, WORK, LWORK, IWORK, INFO ) */ |
296 | | |
297 | | /* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK */ |
298 | | /* REAL RCOND */ |
299 | | /* INTEGER IWORK( * ) */ |
300 | | /* REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) */ |
301 | | |
302 | | |
303 | | /* > \par Purpose: */ |
304 | | /* ============= */ |
305 | | /* > */ |
306 | | /* > \verbatim */ |
307 | | /* > */ |
308 | | /* > SGELSD computes the minimum-norm solution to a real linear least */ |
309 | | /* > squares problem: */ |
310 | | /* > minimize 2-norm(| b - A*x |) */ |
311 | | /* > using the singular value decomposition (SVD) of A. A is an M-by-N */ |
312 | | /* > matrix which may be rank-deficient. */ |
313 | | /* > */ |
314 | | /* > Several right hand side vectors b and solution vectors x can be */ |
315 | | /* > handled in a single call; they are stored as the columns of the */ |
316 | | /* > M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ |
317 | | /* > matrix X. */ |
318 | | /* > */ |
319 | | /* > The problem is solved in three steps: */ |
320 | | /* > (1) Reduce the coefficient matrix A to bidiagonal form with */ |
321 | | /* > Householder transformations, reducing the original problem */ |
322 | | /* > into a "bidiagonal least squares problem" (BLS) */ |
323 | | /* > (2) Solve the BLS using a divide and conquer approach. */ |
324 | | /* > (3) Apply back all the Householder transformations to solve */ |
325 | | /* > the original least squares problem. */ |
326 | | /* > */ |
327 | | /* > The effective rank of A is determined by treating as zero those */ |
328 | | /* > singular values which are less than RCOND times the largest singular */ |
329 | | /* > value. */ |
330 | | /* > */ |
331 | | /* > The divide and conquer algorithm makes very mild assumptions about */ |
332 | | /* > floating point arithmetic. It will work on machines with a guard */ |
333 | | /* > digit in add/subtract, or on those binary machines without guard */ |
334 | | /* > digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ |
335 | | /* > Cray-2. It could conceivably fail on hexadecimal or decimal machines */ |
336 | | /* > without guard digits, but we know of none. */ |
337 | | /* > \endverbatim */ |
338 | | |
339 | | /* Arguments: */ |
340 | | /* ========== */ |
341 | | |
342 | | /* > \param[in] M */ |
343 | | /* > \verbatim */ |
344 | | /* > M is INTEGER */ |
345 | | /* > The number of rows of A. M >= 0. */ |
346 | | /* > \endverbatim */ |
347 | | /* > */ |
348 | | /* > \param[in] N */ |
349 | | /* > \verbatim */ |
350 | | /* > N is INTEGER */ |
351 | | /* > The number of columns of A. N >= 0. */ |
352 | | /* > \endverbatim */ |
353 | | /* > */ |
354 | | /* > \param[in] NRHS */ |
355 | | /* > \verbatim */ |
356 | | /* > NRHS is INTEGER */ |
357 | | /* > The number of right hand sides, i.e., the number of columns */ |
358 | | /* > of the matrices B and X. NRHS >= 0. */ |
359 | | /* > \endverbatim */ |
360 | | /* > */ |
361 | | /* > \param[in,out] A */ |
362 | | /* > \verbatim */ |
363 | | /* > A is REAL array, dimension (LDA,N) */ |
364 | | /* > On entry, the M-by-N matrix A. */ |
365 | | /* > On exit, A has been destroyed. */ |
366 | | /* > \endverbatim */ |
367 | | /* > */ |
368 | | /* > \param[in] LDA */ |
369 | | /* > \verbatim */ |
370 | | /* > LDA is INTEGER */ |
371 | | /* > The leading dimension of the array A. LDA >= f2cmax(1,M). */ |
372 | | /* > \endverbatim */ |
373 | | /* > */ |
374 | | /* > \param[in,out] B */ |
375 | | /* > \verbatim */ |
376 | | /* > B is REAL array, dimension (LDB,NRHS) */ |
377 | | /* > On entry, the M-by-NRHS right hand side matrix B. */ |
378 | | /* > On exit, B is overwritten by the N-by-NRHS solution */ |
379 | | /* > matrix X. If m >= n and RANK = n, the residual */ |
380 | | /* > sum-of-squares for the solution in the i-th column is given */ |
381 | | /* > by the sum of squares of elements n+1:m in that column. */ |
382 | | /* > \endverbatim */ |
383 | | /* > */ |
384 | | /* > \param[in] LDB */ |
385 | | /* > \verbatim */ |
386 | | /* > LDB is INTEGER */ |
387 | | /* > The leading dimension of the array B. LDB >= f2cmax(1,f2cmax(M,N)). */ |
388 | | /* > \endverbatim */ |
389 | | /* > */ |
390 | | /* > \param[out] S */ |
391 | | /* > \verbatim */ |
392 | | /* > S is REAL array, dimension (f2cmin(M,N)) */ |
393 | | /* > The singular values of A in decreasing order. */ |
394 | | /* > The condition number of A in the 2-norm = S(1)/S(f2cmin(m,n)). */ |
395 | | /* > \endverbatim */ |
396 | | /* > */ |
397 | | /* > \param[in] RCOND */ |
398 | | /* > \verbatim */ |
399 | | /* > RCOND is REAL */ |
400 | | /* > RCOND is used to determine the effective rank of A. */ |
401 | | /* > Singular values S(i) <= RCOND*S(1) are treated as zero. */ |
402 | | /* > If RCOND < 0, machine precision is used instead. */ |
403 | | /* > \endverbatim */ |
404 | | /* > */ |
405 | | /* > \param[out] RANK */ |
406 | | /* > \verbatim */ |
407 | | /* > RANK is INTEGER */ |
408 | | /* > The effective rank of A, i.e., the number of singular values */ |
409 | | /* > which are greater than RCOND*S(1). */ |
410 | | /* > \endverbatim */ |
411 | | /* > */ |
412 | | /* > \param[out] WORK */ |
413 | | /* > \verbatim */ |
414 | | /* > WORK is REAL array, dimension (MAX(1,LWORK)) */ |
415 | | /* > On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ |
416 | | /* > \endverbatim */ |
417 | | /* > */ |
418 | | /* > \param[in] LWORK */ |
419 | | /* > \verbatim */ |
420 | | /* > LWORK is INTEGER */ |
421 | | /* > The dimension of the array WORK. LWORK must be at least 1. */ |
422 | | /* > The exact minimum amount of workspace needed depends on M, */ |
423 | | /* > N and NRHS. As long as LWORK is at least */ |
424 | | /* > 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, */ |
425 | | /* > if M is greater than or equal to N or */ |
426 | | /* > 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, */ |
427 | | /* > if M is less than N, the code will execute correctly. */ |
428 | | /* > SMLSIZ is returned by ILAENV and is equal to the maximum */ |
429 | | /* > size of the subproblems at the bottom of the computation */ |
430 | | /* > tree (usually about 25), and */ |
431 | | /* > NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) */ |
432 | | /* > For good performance, LWORK should generally be larger. */ |
433 | | /* > */ |
434 | | /* > If LWORK = -1, then a workspace query is assumed; the routine */ |
435 | | /* > only calculates the optimal size of the array WORK and the */ |
436 | | /* > minimum size of the array IWORK, and returns these values as */ |
437 | | /* > the first entries of the WORK and IWORK arrays, and no error */ |
438 | | /* > message related to LWORK is issued by XERBLA. */ |
439 | | /* > \endverbatim */ |
440 | | /* > */ |
441 | | /* > \param[out] IWORK */ |
442 | | /* > \verbatim */ |
443 | | /* > IWORK is INTEGER array, dimension (MAX(1,LIWORK)) */ |
444 | | /* > LIWORK >= f2cmax(1, 3*MINMN*NLVL + 11*MINMN), */ |
445 | | /* > where MINMN = MIN( M,N ). */ |
446 | | /* > On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. */ |
447 | | /* > \endverbatim */ |
448 | | /* > */ |
449 | | /* > \param[out] INFO */ |
450 | | /* > \verbatim */ |
451 | | /* > INFO is INTEGER */ |
452 | | /* > = 0: successful exit */ |
453 | | /* > < 0: if INFO = -i, the i-th argument had an illegal value. */ |
454 | | /* > > 0: the algorithm for computing the SVD failed to converge; */ |
455 | | /* > if INFO = i, i off-diagonal elements of an intermediate */ |
456 | | /* > bidiagonal form did not converge to zero. */ |
457 | | /* > \endverbatim */ |
458 | | |
459 | | /* Authors: */ |
460 | | /* ======== */ |
461 | | |
462 | | /* > \author Univ. of Tennessee */ |
463 | | /* > \author Univ. of California Berkeley */ |
464 | | /* > \author Univ. of Colorado Denver */ |
465 | | /* > \author NAG Ltd. */ |
466 | | |
467 | | /* > \date June 2017 */ |
468 | | |
469 | | /* > \ingroup realGEsolve */ |
470 | | |
471 | | /* > \par Contributors: */ |
472 | | /* ================== */ |
473 | | /* > */ |
474 | | /* > Ming Gu and Ren-Cang Li, Computer Science Division, University of */ |
475 | | /* > California at Berkeley, USA \n */ |
476 | | /* > Osni Marques, LBNL/NERSC, USA \n */ |
477 | | |
478 | | /* ===================================================================== */ |
479 | | /* Subroutine */ void sgelsd_(integer *m, integer *n, integer *nrhs, real *a, |
480 | | integer *lda, real *b, integer *ldb, real *s, real *rcond, integer * |
481 | | rank, real *work, integer *lwork, integer *iwork, integer *info) |
482 | 0 | { |
483 | | /* System generated locals */ |
484 | 0 | integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; |
485 | | |
486 | | /* Local variables */ |
487 | 0 | real anrm, bnrm; |
488 | 0 | integer itau, nlvl, iascl, ibscl; |
489 | 0 | real sfmin; |
490 | 0 | integer minmn, maxmn, itaup, itauq, mnthr, nwork, ie, il; |
491 | 0 | extern /* Subroutine */ void slabad_(real *, real *); |
492 | 0 | integer mm; |
493 | 0 | extern /* Subroutine */ void sgebrd_(integer *, integer *, real *, integer |
494 | 0 | *, real *, real *, real *, real *, real *, integer *, integer *); |
495 | 0 | extern real slamch_(char *), slange_(char *, integer *, integer *, |
496 | 0 | real *, integer *, real *); |
497 | 0 | extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); |
498 | 0 | extern integer ilaenv_(integer *, char *, char *, integer *, integer *, |
499 | 0 | integer *, integer *, ftnlen, ftnlen); |
500 | 0 | real bignum; |
501 | 0 | extern /* Subroutine */ void sgelqf_(integer *, integer *, real *, integer |
502 | 0 | *, real *, real *, integer *, integer *), slalsd_(char *, integer |
503 | 0 | *, integer *, integer *, real *, real *, real *, integer *, real * |
504 | 0 | , integer *, real *, integer *, integer *), slascl_(char * |
505 | 0 | , integer *, integer *, real *, real *, integer *, integer *, |
506 | 0 | real *, integer *, integer *); |
507 | 0 | integer wlalsd; |
508 | 0 | extern /* Subroutine */ void sgeqrf_(integer *, integer *, real *, integer |
509 | 0 | *, real *, real *, integer *, integer *), slacpy_(char *, integer |
510 | 0 | *, integer *, real *, integer *, real *, integer *), |
511 | 0 | slaset_(char *, integer *, integer *, real *, real *, real *, |
512 | 0 | integer *); |
513 | 0 | integer ldwork; |
514 | 0 | extern /* Subroutine */ void sormbr_(char *, char *, char *, integer *, |
515 | 0 | integer *, integer *, real *, integer *, real *, real *, integer * |
516 | 0 | , real *, integer *, integer *); |
517 | 0 | integer liwork, minwrk, maxwrk; |
518 | 0 | real smlnum; |
519 | 0 | extern /* Subroutine */ void sormlq_(char *, char *, integer *, integer *, |
520 | 0 | integer *, real *, integer *, real *, real *, integer *, real *, |
521 | 0 | integer *, integer *); |
522 | 0 | logical lquery; |
523 | 0 | integer smlsiz; |
524 | 0 | extern /* Subroutine */ void sormqr_(char *, char *, integer *, integer *, |
525 | 0 | integer *, real *, integer *, real *, real *, integer *, real *, |
526 | 0 | integer *, integer *); |
527 | 0 | real eps; |
528 | | |
529 | | |
530 | | /* -- LAPACK driver routine (version 3.7.1) -- */ |
531 | | /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ |
532 | | /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ |
533 | | /* June 2017 */ |
534 | | |
535 | | |
536 | | /* ===================================================================== */ |
537 | | |
538 | | |
539 | | /* Test the input arguments. */ |
540 | | |
541 | | /* Parameter adjustments */ |
542 | 0 | a_dim1 = *lda; |
543 | 0 | a_offset = 1 + a_dim1; |
544 | 0 | a -= a_offset; |
545 | 0 | b_dim1 = *ldb; |
546 | 0 | b_offset = 1 + b_dim1; |
547 | 0 | b -= b_offset; |
548 | 0 | --s; |
549 | 0 | --work; |
550 | 0 | --iwork; |
551 | 0 | fprintf(stdout,"start of SGELSD\n"); |
552 | | /* Function Body */ |
553 | 0 | *info = 0; |
554 | 0 | minmn = f2cmin(*m,*n); |
555 | 0 | maxmn = f2cmax(*m,*n); |
556 | 0 | lquery = *lwork == -1; |
557 | 0 | if (*m < 0) { |
558 | 0 | *info = -1; |
559 | 0 | } else if (*n < 0) { |
560 | 0 | *info = -2; |
561 | 0 | } else if (*nrhs < 0) { |
562 | 0 | *info = -3; |
563 | 0 | } else if (*lda < f2cmax(1,*m)) { |
564 | 0 | *info = -5; |
565 | 0 | } else if (*ldb < f2cmax(1,maxmn)) { |
566 | 0 | *info = -7; |
567 | 0 | } |
568 | | |
569 | | /* Compute workspace. */ |
570 | | /* (Note: Comments in the code beginning "Workspace:" describe the */ |
571 | | /* minimal amount of workspace needed at that point in the code, */ |
572 | | /* as well as the preferred amount for good performance. */ |
573 | | /* NB refers to the optimal block size for the immediately */ |
574 | | /* following subroutine, as returned by ILAENV.) */ |
575 | |
|
576 | 0 | if (*info == 0) { |
577 | 0 | minwrk = 1; |
578 | 0 | maxwrk = 1; |
579 | 0 | liwork = 1; |
580 | 0 | if (minmn > 0) { |
581 | 0 | smlsiz = ilaenv_(&c__9, "SGELSD", " ", &c__0, &c__0, &c__0, &c__0, |
582 | 0 | (ftnlen)6, (ftnlen)1); |
583 | 0 | mnthr = ilaenv_(&c__6, "SGELSD", " ", m, n, nrhs, &c_n1, (ftnlen) |
584 | 0 | 6, (ftnlen)1); |
585 | | /* Computing MAX */ |
586 | 0 | i__1 = (integer) (logf((real) minmn / (real) (smlsiz + 1)) / logf( |
587 | 0 | 2.f)) + 1; |
588 | 0 | nlvl = f2cmax(i__1,0); |
589 | 0 | liwork = minmn * 3 * nlvl + minmn * 11; |
590 | 0 | mm = *m; |
591 | 0 | if (*m >= *n && *m >= mnthr) { |
592 | | |
593 | | /* Path 1a - overdetermined, with many more rows than */ |
594 | | /* columns. */ |
595 | |
|
596 | 0 | mm = *n; |
597 | | /* Computing MAX */ |
598 | 0 | i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", |
599 | 0 | " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); |
600 | 0 | maxwrk = f2cmax(i__1,i__2); |
601 | | /* Computing MAX */ |
602 | 0 | i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", |
603 | 0 | "LT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2); |
604 | 0 | maxwrk = f2cmax(i__1,i__2); |
605 | 0 | } |
606 | 0 | if (*m >= *n) { |
607 | | |
608 | | /* Path 1 - overdetermined or exactly determined. */ |
609 | | |
610 | | /* Computing MAX */ |
611 | 0 | i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, |
612 | 0 | "SGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, ( |
613 | 0 | ftnlen)1); |
614 | 0 | maxwrk = f2cmax(i__1,i__2); |
615 | | /* Computing MAX */ |
616 | 0 | i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR" |
617 | 0 | , "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3); |
618 | 0 | maxwrk = f2cmax(i__1,i__2); |
619 | | /* Computing MAX */ |
620 | 0 | i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, |
621 | 0 | "SORMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, ( |
622 | 0 | ftnlen)3); |
623 | 0 | maxwrk = f2cmax(i__1,i__2); |
624 | | /* Computing 2nd power */ |
625 | 0 | i__1 = smlsiz + 1; |
626 | 0 | wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * |
627 | 0 | *nrhs + i__1 * i__1; |
628 | | /* Computing MAX */ |
629 | 0 | i__1 = maxwrk, i__2 = *n * 3 + wlalsd; |
630 | 0 | maxwrk = f2cmax(i__1,i__2); |
631 | | /* Computing MAX */ |
632 | 0 | i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = f2cmax(i__1, |
633 | 0 | i__2), i__2 = *n * 3 + wlalsd; |
634 | 0 | minwrk = f2cmax(i__1,i__2); |
635 | 0 | } |
636 | 0 | if (*n > *m) { |
637 | | /* Computing 2nd power */ |
638 | 0 | i__1 = smlsiz + 1; |
639 | 0 | wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * |
640 | 0 | *nrhs + i__1 * i__1; |
641 | 0 | if (*n >= mnthr) { |
642 | | |
643 | | /* Path 2a - underdetermined, with many more columns */ |
644 | | /* than rows. */ |
645 | |
|
646 | 0 | maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, & |
647 | 0 | c_n1, &c_n1, (ftnlen)6, (ftnlen)1); |
648 | | /* Computing MAX */ |
649 | 0 | i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) * |
650 | 0 | ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, |
651 | 0 | (ftnlen)6, (ftnlen)1); |
652 | 0 | maxwrk = f2cmax(i__1,i__2); |
653 | | /* Computing MAX */ |
654 | 0 | i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * |
655 | 0 | ilaenv_(&c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, |
656 | 0 | (ftnlen)6, (ftnlen)3); |
657 | 0 | maxwrk = f2cmax(i__1,i__2); |
658 | | /* Computing MAX */ |
659 | 0 | i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) * |
660 | 0 | ilaenv_(&c__1, "SORMBR", "PLN", m, nrhs, m, &c_n1, |
661 | 0 | (ftnlen)6, (ftnlen)3); |
662 | 0 | maxwrk = f2cmax(i__1,i__2); |
663 | 0 | if (*nrhs > 1) { |
664 | | /* Computing MAX */ |
665 | 0 | i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs; |
666 | 0 | maxwrk = f2cmax(i__1,i__2); |
667 | 0 | } else { |
668 | | /* Computing MAX */ |
669 | 0 | i__1 = maxwrk, i__2 = *m * *m + (*m << 1); |
670 | 0 | maxwrk = f2cmax(i__1,i__2); |
671 | 0 | } |
672 | | /* Computing MAX */ |
673 | 0 | i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ" |
674 | 0 | , "LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2); |
675 | 0 | maxwrk = f2cmax(i__1,i__2); |
676 | | /* Computing MAX */ |
677 | 0 | i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd; |
678 | 0 | maxwrk = f2cmax(i__1,i__2); |
679 | | /* XXX: Ensure the Path 2a case below is triggered. The workspace */ |
680 | | /* calculation should use queries for all routines eventually. */ |
681 | | /* Computing MAX */ |
682 | | /* Computing MAX */ |
683 | 0 | i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), |
684 | 0 | i__3 = f2cmax(i__3,*nrhs), i__4 = *n - *m * 3; |
685 | 0 | i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + f2cmax(i__3,i__4) |
686 | 0 | ; |
687 | 0 | maxwrk = f2cmax(i__1,i__2); |
688 | 0 | } else { |
689 | | |
690 | | /* Path 2 - remaining underdetermined cases. */ |
691 | |
|
692 | 0 | maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", |
693 | 0 | " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); |
694 | | /* Computing MAX */ |
695 | 0 | i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, |
696 | 0 | "SORMBR", "QLT", m, nrhs, n, &c_n1, (ftnlen)6, ( |
697 | 0 | ftnlen)3); |
698 | 0 | maxwrk = f2cmax(i__1,i__2); |
699 | | /* Computing MAX */ |
700 | 0 | i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORM" |
701 | 0 | "BR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen) |
702 | 0 | 3); |
703 | 0 | maxwrk = f2cmax(i__1,i__2); |
704 | | /* Computing MAX */ |
705 | 0 | i__1 = maxwrk, i__2 = *m * 3 + wlalsd; |
706 | 0 | maxwrk = f2cmax(i__1,i__2); |
707 | 0 | } |
708 | | /* Computing MAX */ |
709 | 0 | i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = f2cmax(i__1, |
710 | 0 | i__2), i__2 = *m * 3 + wlalsd; |
711 | 0 | minwrk = f2cmax(i__1,i__2); |
712 | 0 | } |
713 | 0 | } |
714 | 0 | minwrk = f2cmin(minwrk,maxwrk); |
715 | 0 | work[1] = (real) maxwrk; |
716 | 0 | iwork[1] = liwork; |
717 | |
|
718 | 0 | if (*lwork < minwrk && ! lquery) { |
719 | 0 | *info = -12; |
720 | 0 | } |
721 | 0 | } |
722 | |
|
723 | 0 | if (*info != 0) { |
724 | 0 | i__1 = -(*info); |
725 | 0 | xerbla_("SGELSD", &i__1, (ftnlen)6); |
726 | 0 | return; |
727 | 0 | } else if (lquery) { |
728 | 0 | return; |
729 | 0 | } |
730 | | |
731 | | /* Quick return if possible. */ |
732 | | |
733 | 0 | if (*m == 0 || *n == 0) { |
734 | 0 | fprintf(stdout,"SGELSD quickreturn rank=0\n"); |
735 | 0 | *rank = 0; |
736 | 0 | return; |
737 | 0 | } |
738 | | |
739 | | /* Get machine parameters. */ |
740 | | |
741 | 0 | eps = slamch_("P"); |
742 | 0 | sfmin = slamch_("S"); |
743 | 0 | smlnum = sfmin / eps; |
744 | 0 | bignum = 1.f / smlnum; |
745 | | // FILE *bla=fopen("/tmp/bla","w"); |
746 | | //fprintf(bla,"SGELSD eps=%g sfmin=%g smlnum=%g bignum=%g\n",eps,sfmin,smlnum,bignum); |
747 | | //fclose(bla); |
748 | 0 | slabad_(&smlnum, &bignum); |
749 | | |
750 | | /* Scale A if f2cmax entry outside range [SMLNUM,BIGNUM]. */ |
751 | |
|
752 | 0 | anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); |
753 | 0 | iascl = 0; |
754 | 0 | if (anrm > 0.f && anrm < smlnum) { |
755 | | |
756 | | /* Scale matrix norm up to SMLNUM. */ |
757 | 0 | fprintf(stdout,"scaling A up to SML\n"); |
758 | 0 | slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, |
759 | 0 | info); |
760 | 0 | iascl = 1; |
761 | 0 | } else if (anrm > bignum) { |
762 | | |
763 | | /* Scale matrix norm down to BIGNUM. */ |
764 | |
|
765 | 0 | fprintf(stdout,"scaling A down to BIG\n"); |
766 | 0 | slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, |
767 | 0 | info); |
768 | 0 | iascl = 2; |
769 | 0 | } else if (anrm == 0.f) { |
770 | | |
771 | | /* Matrix all zero. Return zero solution. */ |
772 | |
|
773 | 0 | fprintf(stdout,"A is zero soln\n"); |
774 | 0 | i__1 = f2cmax(*m,*n); |
775 | 0 | slaset_("F", &i__1, nrhs, &c_b81, &c_b81, &b[b_offset], ldb); |
776 | 0 | slaset_("F", &minmn, &c__1, &c_b81, &c_b81, &s[1], &c__1); |
777 | 0 | *rank = 0; |
778 | 0 | goto L10; |
779 | 0 | } |
780 | | |
781 | | /* Scale B if f2cmax entry outside range [SMLNUM,BIGNUM]. */ |
782 | | |
783 | 0 | bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); |
784 | 0 | ibscl = 0; |
785 | 0 | if (bnrm > 0.f && bnrm < smlnum) { |
786 | | |
787 | | /* Scale matrix norm up to SMLNUM. */ |
788 | 0 | fprintf(stdout,"scaling B up to SML\n"); |
789 | |
|
790 | 0 | slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, |
791 | 0 | info); |
792 | 0 | ibscl = 1; |
793 | 0 | } else if (bnrm > bignum) { |
794 | | |
795 | | /* Scale matrix norm down to BIGNUM. */ |
796 | 0 | fprintf(stdout,"scaling B down to BIG\n"); |
797 | |
|
798 | 0 | slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, |
799 | 0 | info); |
800 | 0 | ibscl = 2; |
801 | 0 | } |
802 | | |
803 | | /* If M < N make sure certain entries of B are zero. */ |
804 | |
|
805 | 0 | if (*m < *n) { |
806 | 0 | i__1 = *n - *m; |
807 | 0 | fprintf(stdout,"zeroing parts of B \n"); |
808 | 0 | slaset_("F", &i__1, nrhs, &c_b81, &c_b81, &b[*m + 1 + b_dim1], ldb); |
809 | 0 | } |
810 | | |
811 | | /* Overdetermined case. */ |
812 | |
|
813 | 0 | if (*m >= *n) { |
814 | 0 | fprintf(stdout,"overdetermined, path 1 \n"); |
815 | | |
816 | | /* Path 1 - overdetermined or exactly determined. */ |
817 | |
|
818 | 0 | mm = *m; |
819 | 0 | if (*m >= mnthr) { |
820 | | |
821 | | /* Path 1a - overdetermined, with many more rows than columns. */ |
822 | 0 | fprintf(stdout,"overdetermined, path 1a \n"); |
823 | |
|
824 | 0 | mm = *n; |
825 | 0 | itau = 1; |
826 | 0 | nwork = itau + *n; |
827 | | |
828 | | /* Compute A=Q*R. */ |
829 | | /* (Workspace: need 2*N, prefer N+N*NB) */ |
830 | |
|
831 | 0 | i__1 = *lwork - nwork + 1; |
832 | 0 | sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, |
833 | 0 | info); |
834 | | |
835 | | /* Multiply B by transpose(Q). */ |
836 | | /* (Workspace: need N+NRHS, prefer N+NRHS*NB) */ |
837 | |
|
838 | 0 | i__1 = *lwork - nwork + 1; |
839 | 0 | sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[ |
840 | 0 | b_offset], ldb, &work[nwork], &i__1, info); |
841 | | |
842 | | /* Zero out below R. */ |
843 | |
|
844 | 0 | if (*n > 1) { |
845 | 0 | i__1 = *n - 1; |
846 | 0 | i__2 = *n - 1; |
847 | 0 | slaset_("L", &i__1, &i__2, &c_b81, &c_b81, &a[a_dim1 + 2], |
848 | 0 | lda); |
849 | 0 | } |
850 | 0 | } |
851 | |
|
852 | 0 | ie = 1; |
853 | 0 | itauq = ie + *n; |
854 | 0 | itaup = itauq + *n; |
855 | 0 | nwork = itaup + *n; |
856 | | |
857 | | /* Bidiagonalize R in A. */ |
858 | | /* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) */ |
859 | |
|
860 | 0 | i__1 = *lwork - nwork + 1; |
861 | 0 | sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & |
862 | 0 | work[itaup], &work[nwork], &i__1, info); |
863 | | |
864 | | /* Multiply B by transpose of left bidiagonalizing vectors of R. */ |
865 | | /* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) */ |
866 | |
|
867 | 0 | i__1 = *lwork - nwork + 1; |
868 | 0 | sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq], |
869 | 0 | &b[b_offset], ldb, &work[nwork], &i__1, info); |
870 | | |
871 | | /* Solve the bidiagonal least squares problem. */ |
872 | |
|
873 | 0 | slalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb, |
874 | 0 | rcond, rank, &work[nwork], &iwork[1], info); |
875 | 0 | if (*info != 0) { |
876 | 0 | fprintf(stdout,"info !=0 nach slalsd\n"); |
877 | 0 | goto L10; |
878 | 0 | } |
879 | | |
880 | | /* Multiply B by right bidiagonalizing vectors of R. */ |
881 | | |
882 | 0 | i__1 = *lwork - nwork + 1; |
883 | 0 | sormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], & |
884 | 0 | b[b_offset], ldb, &work[nwork], &i__1, info); |
885 | |
|
886 | 0 | } else /* if(complicated condition) */ { |
887 | 0 | fprintf(stdout,"not overdetermined \n"); |
888 | | /* Computing MAX */ |
889 | 0 | i__1 = *m, i__2 = (*m << 1) - 4, i__1 = f2cmax(i__1,i__2), i__1 = f2cmax( |
890 | 0 | i__1,*nrhs), i__2 = *n - *m * 3, i__1 = f2cmax(i__1,i__2); |
891 | 0 | if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + f2cmax(i__1,wlalsd)) { |
892 | | |
893 | | /* Path 2a - underdetermined, with many more columns than rows */ |
894 | | /* and sufficient workspace for an efficient algorithm. */ |
895 | |
|
896 | 0 | fprintf(stdout,"not overdetermined, path 2a\n"); |
897 | |
|
898 | 0 | ldwork = *m; |
899 | | /* Computing MAX */ |
900 | | /* Computing MAX */ |
901 | 0 | i__3 = *m, i__4 = (*m << 1) - 4, i__3 = f2cmax(i__3,i__4), i__3 = |
902 | 0 | f2cmax(i__3,*nrhs), i__4 = *n - *m * 3; |
903 | 0 | i__1 = (*m << 2) + *m * *lda + f2cmax(i__3,i__4), i__2 = *m * *lda + |
904 | 0 | *m + *m * *nrhs, i__1 = f2cmax(i__1,i__2), i__2 = (*m << 2) |
905 | 0 | + *m * *lda + wlalsd; |
906 | 0 | if (*lwork >= f2cmax(i__1,i__2)) { |
907 | 0 | ldwork = *lda; |
908 | 0 | } |
909 | 0 | itau = 1; |
910 | 0 | nwork = *m + 1; |
911 | | |
912 | | /* Compute A=L*Q. */ |
913 | | /* (Workspace: need 2*M, prefer M+M*NB) */ |
914 | |
|
915 | 0 | i__1 = *lwork - nwork + 1; |
916 | 0 | sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1, |
917 | 0 | info); |
918 | 0 | il = nwork; |
919 | | |
920 | | /* Copy L to WORK(IL), zeroing out above its diagonal. */ |
921 | |
|
922 | 0 | slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork); |
923 | 0 | i__1 = *m - 1; |
924 | 0 | i__2 = *m - 1; |
925 | 0 | slaset_("U", &i__1, &i__2, &c_b81, &c_b81, &work[il + ldwork], & |
926 | 0 | ldwork); |
927 | 0 | ie = il + ldwork * *m; |
928 | 0 | itauq = ie + *m; |
929 | 0 | itaup = itauq + *m; |
930 | 0 | nwork = itaup + *m; |
931 | | |
932 | | /* Bidiagonalize L in WORK(IL). */ |
933 | | /* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) */ |
934 | |
|
935 | 0 | i__1 = *lwork - nwork + 1; |
936 | 0 | sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq], |
937 | 0 | &work[itaup], &work[nwork], &i__1, info); |
938 | | |
939 | | /* Multiply B by transpose of left bidiagonalizing vectors of L. */ |
940 | | /* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) */ |
941 | |
|
942 | 0 | i__1 = *lwork - nwork + 1; |
943 | 0 | sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[ |
944 | 0 | itauq], &b[b_offset], ldb, &work[nwork], &i__1, info); |
945 | | |
946 | | /* Solve the bidiagonal least squares problem. */ |
947 | |
|
948 | 0 | slalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], |
949 | 0 | ldb, rcond, rank, &work[nwork], &iwork[1], info); |
950 | 0 | if (*info != 0) { |
951 | 0 | goto L10; |
952 | 0 | } |
953 | | |
954 | | /* Multiply B by right bidiagonalizing vectors of L. */ |
955 | | |
956 | 0 | i__1 = *lwork - nwork + 1; |
957 | 0 | sormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[ |
958 | 0 | itaup], &b[b_offset], ldb, &work[nwork], &i__1, info); |
959 | | |
960 | | /* Zero out below first M rows of B. */ |
961 | |
|
962 | 0 | i__1 = *n - *m; |
963 | 0 | slaset_("F", &i__1, nrhs, &c_b81, &c_b81, &b[*m + 1 + b_dim1], |
964 | 0 | ldb); |
965 | 0 | nwork = itau + *m; |
966 | | |
967 | | /* Multiply transpose(Q) by B. */ |
968 | | /* (Workspace: need M+NRHS, prefer M+NRHS*NB) */ |
969 | |
|
970 | 0 | i__1 = *lwork - nwork + 1; |
971 | 0 | sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[ |
972 | 0 | b_offset], ldb, &work[nwork], &i__1, info); |
973 | |
|
974 | 0 | } else { |
975 | | |
976 | | /* Path 2 - remaining underdetermined cases. */ |
977 | 0 | fprintf(stdout,"other underdetermined, path 2"); |
978 | |
|
979 | 0 | ie = 1; |
980 | 0 | itauq = ie + *m; |
981 | 0 | itaup = itauq + *m; |
982 | 0 | nwork = itaup + *m; |
983 | | |
984 | | /* Bidiagonalize A. */ |
985 | | /* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */ |
986 | |
|
987 | 0 | i__1 = *lwork - nwork + 1; |
988 | 0 | sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], & |
989 | 0 | work[itaup], &work[nwork], &i__1, info); |
990 | | |
991 | | /* Multiply B by transpose of left bidiagonalizing vectors. */ |
992 | | /* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) */ |
993 | |
|
994 | 0 | i__1 = *lwork - nwork + 1; |
995 | 0 | sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq] |
996 | 0 | , &b[b_offset], ldb, &work[nwork], &i__1, info); |
997 | | |
998 | | /* Solve the bidiagonal least squares problem. */ |
999 | |
|
1000 | 0 | slalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset], |
1001 | 0 | ldb, rcond, rank, &work[nwork], &iwork[1], info); |
1002 | 0 | if (*info != 0) { |
1003 | 0 | goto L10; |
1004 | 0 | } |
1005 | | |
1006 | | /* Multiply B by right bidiagonalizing vectors of A. */ |
1007 | | |
1008 | 0 | i__1 = *lwork - nwork + 1; |
1009 | 0 | sormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup] |
1010 | 0 | , &b[b_offset], ldb, &work[nwork], &i__1, info); |
1011 | |
|
1012 | 0 | } |
1013 | 0 | } |
1014 | | |
1015 | | /* Undo scaling. */ |
1016 | | |
1017 | 0 | if (iascl == 1) { |
1018 | 0 | fprintf(stdout," unscaling a1\n"); |
1019 | 0 | slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, |
1020 | 0 | info); |
1021 | 0 | slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], & |
1022 | 0 | minmn, info); |
1023 | 0 | } else if (iascl == 2) { |
1024 | 0 | fprintf(stdout," unscaling a2\n"); |
1025 | 0 | slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, |
1026 | 0 | info); |
1027 | 0 | slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], & |
1028 | 0 | minmn, info); |
1029 | 0 | } |
1030 | 0 | if (ibscl == 1) { |
1031 | 0 | fprintf(stdout," unscaling b1\n"); |
1032 | 0 | slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, |
1033 | 0 | info); |
1034 | 0 | } else if (ibscl == 2) { |
1035 | 0 | fprintf(stdout," unscaling b2\n"); |
1036 | 0 | slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, |
1037 | 0 | info); |
1038 | 0 | } |
1039 | |
|
1040 | 0 | L10: |
1041 | 0 | work[1] = (real) maxwrk; |
1042 | 0 | iwork[1] = liwork; |
1043 | 0 | fprintf(stdout, "end of SGELSD\n"); |
1044 | 0 | return; |
1045 | | |
1046 | | /* End of SGELSD */ |
1047 | |
|
1048 | 0 | } /* sgelsd_ */ |
1049 | | |