Coverage Report

Created: 2025-09-18 13:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/openblas/lapack-netlib/SRC/sgesvd.c
Line
Count
Source
1
#include <math.h>
2
#include <stdlib.h>
3
#include <string.h>
4
#include <stdio.h>
5
#include <complex.h>
6
#ifdef complex
7
#undef complex
8
#endif
9
#ifdef I
10
#undef I
11
#endif
12
13
#if defined(_WIN64)
14
typedef long long BLASLONG;
15
typedef unsigned long long BLASULONG;
16
#else
17
typedef long BLASLONG;
18
typedef unsigned long BLASULONG;
19
#endif
20
21
#ifdef LAPACK_ILP64
22
typedef BLASLONG blasint;
23
#if defined(_WIN64)
24
#define blasabs(x) llabs(x)
25
#else
26
#define blasabs(x) labs(x)
27
#endif
28
#else
29
typedef int blasint;
30
#define blasabs(x) abs(x)
31
#endif
32
33
typedef blasint integer;
34
35
typedef unsigned int uinteger;
36
typedef char *address;
37
typedef short int shortint;
38
typedef float real;
39
typedef double doublereal;
40
typedef struct { real r, i; } complex;
41
typedef struct { doublereal r, i; } doublecomplex;
42
#ifdef _MSC_VER
43
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
44
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
45
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
46
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
47
#else
48
0
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
49
0
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
50
0
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
51
0
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
52
#endif
53
#define pCf(z) (*_pCf(z))
54
#define pCd(z) (*_pCd(z))
55
typedef blasint logical;
56
57
typedef char logical1;
58
typedef char integer1;
59
60
#define TRUE_ (1)
61
#define FALSE_ (0)
62
63
/* Extern is for use with -E */
64
#ifndef Extern
65
#define Extern extern
66
#endif
67
68
/* I/O stuff */
69
70
typedef int flag;
71
typedef int ftnlen;
72
typedef int ftnint;
73
74
/*external read, write*/
75
typedef struct
76
{ flag cierr;
77
  ftnint ciunit;
78
  flag ciend;
79
  char *cifmt;
80
  ftnint cirec;
81
} cilist;
82
83
/*internal read, write*/
84
typedef struct
85
{ flag icierr;
86
  char *iciunit;
87
  flag iciend;
88
  char *icifmt;
89
  ftnint icirlen;
90
  ftnint icirnum;
91
} icilist;
92
93
/*open*/
94
typedef struct
95
{ flag oerr;
96
  ftnint ounit;
97
  char *ofnm;
98
  ftnlen ofnmlen;
99
  char *osta;
100
  char *oacc;
101
  char *ofm;
102
  ftnint orl;
103
  char *oblnk;
104
} olist;
105
106
/*close*/
107
typedef struct
108
{ flag cerr;
109
  ftnint cunit;
110
  char *csta;
111
} cllist;
112
113
/*rewind, backspace, endfile*/
114
typedef struct
115
{ flag aerr;
116
  ftnint aunit;
117
} alist;
118
119
/* inquire */
120
typedef struct
121
{ flag inerr;
122
  ftnint inunit;
123
  char *infile;
124
  ftnlen infilen;
125
  ftnint  *inex;  /*parameters in standard's order*/
126
  ftnint  *inopen;
127
  ftnint  *innum;
128
  ftnint  *innamed;
129
  char  *inname;
130
  ftnlen  innamlen;
131
  char  *inacc;
132
  ftnlen  inacclen;
133
  char  *inseq;
134
  ftnlen  inseqlen;
135
  char  *indir;
136
  ftnlen  indirlen;
137
  char  *infmt;
138
  ftnlen  infmtlen;
139
  char  *inform;
140
  ftnint  informlen;
141
  char  *inunf;
142
  ftnlen  inunflen;
143
  ftnint  *inrecl;
144
  ftnint  *innrec;
145
  char  *inblank;
146
  ftnlen  inblanklen;
147
} inlist;
148
149
#define VOID void
150
151
union Multitype { /* for multiple entry points */
152
  integer1 g;
153
  shortint h;
154
  integer i;
155
  /* longint j; */
156
  real r;
157
  doublereal d;
158
  complex c;
159
  doublecomplex z;
160
  };
161
162
typedef union Multitype Multitype;
163
164
struct Vardesc {  /* for Namelist */
165
  char *name;
166
  char *addr;
167
  ftnlen *dims;
168
  int  type;
169
  };
170
typedef struct Vardesc Vardesc;
171
172
struct Namelist {
173
  char *name;
174
  Vardesc **vars;
175
  int nvars;
176
  };
177
typedef struct Namelist Namelist;
178
179
#define abs(x) ((x) >= 0 ? (x) : -(x))
180
#define dabs(x) (fabs(x))
181
0
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
182
0
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
183
#define dmin(a,b) (f2cmin(a,b))
184
#define dmax(a,b) (f2cmax(a,b))
185
#define bit_test(a,b) ((a) >> (b) & 1)
186
#define bit_clear(a,b)  ((a) & ~((uinteger)1 << (b)))
187
#define bit_set(a,b)  ((a) |  ((uinteger)1 << (b)))
188
189
#define abort_() { sig_die("Fortran abort routine called", 1); }
190
#define c_abs(z) (cabsf(Cf(z)))
191
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
192
#ifdef _MSC_VER
193
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
194
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/df(b)._Val[1]);}
195
#else
196
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
197
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
198
#endif
199
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
200
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
201
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
202
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
203
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
204
#define d_abs(x) (fabs(*(x)))
205
#define d_acos(x) (acos(*(x)))
206
#define d_asin(x) (asin(*(x)))
207
#define d_atan(x) (atan(*(x)))
208
#define d_atn2(x, y) (atan2(*(x),*(y)))
209
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
210
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
211
#define d_cos(x) (cos(*(x)))
212
#define d_cosh(x) (cosh(*(x)))
213
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
214
#define d_exp(x) (exp(*(x)))
215
#define d_imag(z) (cimag(Cd(z)))
216
#define r_imag(z) (cimagf(Cf(z)))
217
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
218
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
219
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
220
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
221
#define d_log(x) (log(*(x)))
222
#define d_mod(x, y) (fmod(*(x), *(y)))
223
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
224
#define d_nint(x) u_nint(*(x))
225
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
226
#define d_sign(a,b) u_sign(*(a),*(b))
227
#define r_sign(a,b) u_sign(*(a),*(b))
228
#define d_sin(x) (sin(*(x)))
229
#define d_sinh(x) (sinh(*(x)))
230
#define d_sqrt(x) (sqrt(*(x)))
231
#define d_tan(x) (tan(*(x)))
232
#define d_tanh(x) (tanh(*(x)))
233
#define i_abs(x) abs(*(x))
234
#define i_dnnt(x) ((integer)u_nint(*(x)))
235
#define i_len(s, n) (n)
236
#define i_nint(x) ((integer)u_nint(*(x)))
237
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
238
0
#define s_cat(lpp, rpp, rnp, np, llp) {   ftnlen i, nc, ll; char *f__rp, *lp;   ll = (llp); lp = (lpp);   for(i=0; i < (int)*(np); ++i) {           nc = ll;          if((rnp)[i] < nc) nc = (rnp)[i];           ll -= nc;           f__rp = (rpp)[i];           while(--nc >= 0) *lp++ = *(f__rp)++;         }   while(--ll >= 0) *lp++ = ' '; }
239
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
240
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
241
#define sig_die(s, kill) { exit(1); }
242
#define s_stop(s, n) {exit(0);}
243
#define z_abs(z) (cabs(Cd(z)))
244
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
245
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
246
#define myexit_() break;
247
#define mycycle() continue;
248
#define myceiling(w) {ceil(w)}
249
#define myhuge(w) {HUGE_VAL}
250
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
251
#define mymaxloc(w,s,e,n) {dmaxloc_(w,*(s),*(e),n)}
252
253
/*  -- translated by f2c (version 20000121).
254
   You must link the resulting object file with the libraries:
255
  -lf2c -lm   (in that order)
256
*/
257
258
259
260
261
/* Table of constant values */
262
263
static integer c__6 = 6;
264
static integer c__0 = 0;
265
static integer c__2 = 2;
266
static integer c_n1 = -1;
267
static real c_b57 = 0.f;
268
static integer c__1 = 1;
269
static real c_b79 = 1.f;
270
271
/* > \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b> */
272
273
/*  =========== DOCUMENTATION =========== */
274
275
/* Online html documentation available at */
276
/*            http://www.netlib.org/lapack/explore-html/ */
277
278
/* > \htmlonly */
279
/* > Download SGESVD + dependencies */
280
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.
281
f"> */
282
/* > [TGZ]</a> */
283
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.
284
f"> */
285
/* > [ZIP]</a> */
286
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.
287
f"> */
288
/* > [TXT]</a> */
289
/* > \endhtmlonly */
290
291
/*  Definition: */
292
/*  =========== */
293
294
/*       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, */
295
/*                          WORK, LWORK, INFO ) */
296
297
/*       CHARACTER          JOBU, JOBVT */
298
/*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N */
299
/*       REAL               A( LDA, * ), S( * ), U( LDU, * ), */
300
/*      $                   VT( LDVT, * ), WORK( * ) */
301
302
303
/* > \par Purpose: */
304
/*  ============= */
305
/* > */
306
/* > \verbatim */
307
/* > */
308
/* > SGESVD computes the singular value decomposition (SVD) of a real */
309
/* > M-by-N matrix A, optionally computing the left and/or right singular */
310
/* > vectors. The SVD is written */
311
/* > */
312
/* >      A = U * SIGMA * transpose(V) */
313
/* > */
314
/* > where SIGMA is an M-by-N matrix which is zero except for its */
315
/* > f2cmin(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and */
316
/* > V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA */
317
/* > are the singular values of A; they are real and non-negative, and */
318
/* > are returned in descending order.  The first f2cmin(m,n) columns of */
319
/* > U and V are the left and right singular vectors of A. */
320
/* > */
321
/* > Note that the routine returns V**T, not V. */
322
/* > \endverbatim */
323
324
/*  Arguments: */
325
/*  ========== */
326
327
/* > \param[in] JOBU */
328
/* > \verbatim */
329
/* >          JOBU is CHARACTER*1 */
330
/* >          Specifies options for computing all or part of the matrix U: */
331
/* >          = 'A':  all M columns of U are returned in array U: */
332
/* >          = 'S':  the first f2cmin(m,n) columns of U (the left singular */
333
/* >                  vectors) are returned in the array U; */
334
/* >          = 'O':  the first f2cmin(m,n) columns of U (the left singular */
335
/* >                  vectors) are overwritten on the array A; */
336
/* >          = 'N':  no columns of U (no left singular vectors) are */
337
/* >                  computed. */
338
/* > \endverbatim */
339
/* > */
340
/* > \param[in] JOBVT */
341
/* > \verbatim */
342
/* >          JOBVT is CHARACTER*1 */
343
/* >          Specifies options for computing all or part of the matrix */
344
/* >          V**T: */
345
/* >          = 'A':  all N rows of V**T are returned in the array VT; */
346
/* >          = 'S':  the first f2cmin(m,n) rows of V**T (the right singular */
347
/* >                  vectors) are returned in the array VT; */
348
/* >          = 'O':  the first f2cmin(m,n) rows of V**T (the right singular */
349
/* >                  vectors) are overwritten on the array A; */
350
/* >          = 'N':  no rows of V**T (no right singular vectors) are */
351
/* >                  computed. */
352
/* > */
353
/* >          JOBVT and JOBU cannot both be 'O'. */
354
/* > \endverbatim */
355
/* > */
356
/* > \param[in] M */
357
/* > \verbatim */
358
/* >          M is INTEGER */
359
/* >          The number of rows of the input matrix A.  M >= 0. */
360
/* > \endverbatim */
361
/* > */
362
/* > \param[in] N */
363
/* > \verbatim */
364
/* >          N is INTEGER */
365
/* >          The number of columns of the input matrix A.  N >= 0. */
366
/* > \endverbatim */
367
/* > */
368
/* > \param[in,out] A */
369
/* > \verbatim */
370
/* >          A is REAL array, dimension (LDA,N) */
371
/* >          On entry, the M-by-N matrix A. */
372
/* >          On exit, */
373
/* >          if JOBU = 'O',  A is overwritten with the first f2cmin(m,n) */
374
/* >                          columns of U (the left singular vectors, */
375
/* >                          stored columnwise); */
376
/* >          if JOBVT = 'O', A is overwritten with the first f2cmin(m,n) */
377
/* >                          rows of V**T (the right singular vectors, */
378
/* >                          stored rowwise); */
379
/* >          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A */
380
/* >                          are destroyed. */
381
/* > \endverbatim */
382
/* > */
383
/* > \param[in] LDA */
384
/* > \verbatim */
385
/* >          LDA is INTEGER */
386
/* >          The leading dimension of the array A.  LDA >= f2cmax(1,M). */
387
/* > \endverbatim */
388
/* > */
389
/* > \param[out] S */
390
/* > \verbatim */
391
/* >          S is REAL array, dimension (f2cmin(M,N)) */
392
/* >          The singular values of A, sorted so that S(i) >= S(i+1). */
393
/* > \endverbatim */
394
/* > */
395
/* > \param[out] U */
396
/* > \verbatim */
397
/* >          U is REAL array, dimension (LDU,UCOL) */
398
/* >          (LDU,M) if JOBU = 'A' or (LDU,f2cmin(M,N)) if JOBU = 'S'. */
399
/* >          If JOBU = 'A', U contains the M-by-M orthogonal matrix U; */
400
/* >          if JOBU = 'S', U contains the first f2cmin(m,n) columns of U */
401
/* >          (the left singular vectors, stored columnwise); */
402
/* >          if JOBU = 'N' or 'O', U is not referenced. */
403
/* > \endverbatim */
404
/* > */
405
/* > \param[in] LDU */
406
/* > \verbatim */
407
/* >          LDU is INTEGER */
408
/* >          The leading dimension of the array U.  LDU >= 1; if */
409
/* >          JOBU = 'S' or 'A', LDU >= M. */
410
/* > \endverbatim */
411
/* > */
412
/* > \param[out] VT */
413
/* > \verbatim */
414
/* >          VT is REAL array, dimension (LDVT,N) */
415
/* >          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix */
416
/* >          V**T; */
417
/* >          if JOBVT = 'S', VT contains the first f2cmin(m,n) rows of */
418
/* >          V**T (the right singular vectors, stored rowwise); */
419
/* >          if JOBVT = 'N' or 'O', VT is not referenced. */
420
/* > \endverbatim */
421
/* > */
422
/* > \param[in] LDVT */
423
/* > \verbatim */
424
/* >          LDVT is INTEGER */
425
/* >          The leading dimension of the array VT.  LDVT >= 1; if */
426
/* >          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= f2cmin(M,N). */
427
/* > \endverbatim */
428
/* > */
429
/* > \param[out] WORK */
430
/* > \verbatim */
431
/* >          WORK is REAL array, dimension (MAX(1,LWORK)) */
432
/* >          On exit, if INFO = 0, WORK(1) returns the optimal LWORK; */
433
/* >          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged */
434
/* >          superdiagonal elements of an upper bidiagonal matrix B */
435
/* >          whose diagonal is in S (not necessarily sorted). B */
436
/* >          satisfies A = U * B * VT, so it has the same singular values */
437
/* >          as A, and singular vectors related by U and VT. */
438
/* > \endverbatim */
439
/* > */
440
/* > \param[in] LWORK */
441
/* > \verbatim */
442
/* >          LWORK is INTEGER */
443
/* >          The dimension of the array WORK. */
444
/* >          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): */
445
/* >             - PATH 1  (M much larger than N, JOBU='N') */
446
/* >             - PATH 1t (N much larger than M, JOBVT='N') */
447
/* >          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths */
448
/* >          For good performance, LWORK should generally be larger. */
449
/* > */
450
/* >          If LWORK = -1, then a workspace query is assumed; the routine */
451
/* >          only calculates the optimal size of the WORK array, returns */
452
/* >          this value as the first entry of the WORK array, and no error */
453
/* >          message related to LWORK is issued by XERBLA. */
454
/* > \endverbatim */
455
/* > */
456
/* > \param[out] INFO */
457
/* > \verbatim */
458
/* >          INFO is INTEGER */
459
/* >          = 0:  successful exit. */
460
/* >          < 0:  if INFO = -i, the i-th argument had an illegal value. */
461
/* >          > 0:  if SBDSQR did not converge, INFO specifies how many */
462
/* >                superdiagonals of an intermediate bidiagonal form B */
463
/* >                did not converge to zero. See the description of WORK */
464
/* >                above for details. */
465
/* > \endverbatim */
466
467
/*  Authors: */
468
/*  ======== */
469
470
/* > \author Univ. of Tennessee */
471
/* > \author Univ. of California Berkeley */
472
/* > \author Univ. of Colorado Denver */
473
/* > \author NAG Ltd. */
474
475
/* > \date April 2012 */
476
477
/* > \ingroup realGEsing */
478
479
/*  ===================================================================== */
480
/* Subroutine */ void sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, 
481
  real *a, integer *lda, real *s, real *u, integer *ldu, real *vt, 
482
  integer *ldvt, real *work, integer *lwork, integer *info)
483
0
{
484
    /* System generated locals */
485
0
    address a__1[2];
486
0
    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1[2], 
487
0
      i__2, i__3, i__4;
488
0
    char ch__1[2];
489
490
    /* Local variables */
491
0
    integer iscl;
492
0
    real anrm;
493
0
    integer ierr, itau, ncvt, nrvt, lwork_sgebrd__, lwork_sgelqf__, 
494
0
      lwork_sgeqrf__, i__;
495
0
    extern logical lsame_(char *, char *);
496
0
    integer chunk;
497
0
    extern /* Subroutine */ void sgemm_(char *, char *, integer *, integer *, 
498
0
      integer *, real *, real *, integer *, real *, integer *, real *, 
499
0
      real *, integer *);
500
0
    integer minmn, wrkbl, itaup, itauq, mnthr, iwork;
501
0
    logical wntua, wntva, wntun, wntuo, wntvn, wntvo, wntus, wntvs;
502
0
    integer ie, ir, bdspac, iu;
503
0
    extern /* Subroutine */ void sgebrd_(integer *, integer *, real *, integer 
504
0
      *, real *, real *, real *, real *, real *, integer *, integer *);
505
0
    extern real slamch_(char *), slange_(char *, integer *, integer *,
506
0
       real *, integer *, real *);
507
0
    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
508
0
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
509
0
      integer *, integer *, ftnlen, ftnlen);
510
0
    real bignum;
511
0
    extern /* Subroutine */ void sgelqf_(integer *, integer *, real *, integer 
512
0
      *, real *, real *, integer *, integer *), slascl_(char *, integer 
513
0
      *, integer *, real *, real *, integer *, integer *, real *, 
514
0
      integer *, integer *), sgeqrf_(integer *, integer *, real 
515
0
      *, integer *, real *, real *, integer *, integer *), slacpy_(char 
516
0
      *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
517
0
      real *, integer *), sbdsqr_(char *, integer *, integer *, 
518
0
      integer *, integer *, real *, real *, real *, integer *, real *, 
519
0
      integer *, real *, integer *, real *, integer *), sorgbr_(
520
0
      char *, integer *, integer *, integer *, real *, integer *, real *
521
0
      , real *, integer *, integer *), sormbr_(char *, char *, 
522
0
      char *, integer *, integer *, integer *, real *, integer *, real *
523
0
      , real *, integer *, real *, integer *, integer *);
524
0
    integer ldwrkr, minwrk, ldwrku, maxwrk;
525
0
    extern /* Subroutine */ void sorglq_(integer *, integer *, integer *, real 
526
0
      *, integer *, real *, real *, integer *, integer *);
527
0
    real smlnum;
528
0
    extern /* Subroutine */ void sorgqr_(integer *, integer *, integer *, real 
529
0
      *, integer *, real *, real *, integer *, integer *);
530
0
    logical lquery, wntuas, wntvas;
531
0
    integer blk, lwork_sorgbr_p__, lwork_sorgbr_q__, lwork_sorglq_m__, 
532
0
      lwork_sorglq_n__, ncu, lwork_sorgqr_n__, lwork_sorgqr_m__;
533
0
    real eps, dum[1];
534
0
    integer nru;
535
536
537
/*  -- LAPACK driver routine (version 3.7.0) -- */
538
/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
539
/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
540
/*     April 2012 */
541
542
543
/*  ===================================================================== */
544
545
546
/*     Test the input arguments */
547
548
    /* Parameter adjustments */
549
0
    a_dim1 = *lda;
550
0
    a_offset = 1 + a_dim1 * 1;
551
0
    a -= a_offset;
552
0
    --s;
553
0
    u_dim1 = *ldu;
554
0
    u_offset = 1 + u_dim1 * 1;
555
0
    u -= u_offset;
556
0
    vt_dim1 = *ldvt;
557
0
    vt_offset = 1 + vt_dim1 * 1;
558
0
    vt -= vt_offset;
559
0
    --work;
560
561
    /* Function Body */
562
0
    *info = 0;
563
0
    minmn = f2cmin(*m,*n);
564
0
    wntua = lsame_(jobu, "A");
565
0
    wntus = lsame_(jobu, "S");
566
0
    wntuas = wntua || wntus;
567
0
    wntuo = lsame_(jobu, "O");
568
0
    wntun = lsame_(jobu, "N");
569
0
    wntva = lsame_(jobvt, "A");
570
0
    wntvs = lsame_(jobvt, "S");
571
0
    wntvas = wntva || wntvs;
572
0
    wntvo = lsame_(jobvt, "O");
573
0
    wntvn = lsame_(jobvt, "N");
574
0
    lquery = *lwork == -1;
575
576
0
    if (! (wntua || wntus || wntuo || wntun)) {
577
0
  *info = -1;
578
0
    } else if (! (wntva || wntvs || wntvo || wntvn) || wntvo && wntuo) {
579
0
  *info = -2;
580
0
    } else if (*m < 0) {
581
0
  *info = -3;
582
0
    } else if (*n < 0) {
583
0
  *info = -4;
584
0
    } else if (*lda < f2cmax(1,*m)) {
585
0
  *info = -6;
586
0
    } else if (*ldu < 1 || wntuas && *ldu < *m) {
587
0
  *info = -9;
588
0
    } else if (*ldvt < 1 || wntva && *ldvt < *n || wntvs && *ldvt < minmn) {
589
0
  *info = -11;
590
0
    }
591
592
/*     Compute workspace */
593
/*      (Note: Comments in the code beginning "Workspace:" describe the */
594
/*       minimal amount of workspace needed at that point in the code, */
595
/*       as well as the preferred amount for good performance. */
596
/*       NB refers to the optimal block size for the immediately */
597
/*       following subroutine, as returned by ILAENV.) */
598
599
0
    if (*info == 0) {
600
0
  minwrk = 1;
601
0
  maxwrk = 1;
602
0
  if (*m >= *n && minmn > 0) {
603
604
/*           Compute space needed for SBDSQR */
605
606
/* Writing concatenation */
607
0
      i__1[0] = 1, a__1[0] = jobu;
608
0
      i__1[1] = 1, a__1[1] = jobvt;
609
0
      s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
610
0
      mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
611
0
        ftnlen)6, (ftnlen)2);
612
0
      bdspac = *n * 5;
613
/*           Compute space needed for SGEQRF */
614
0
      sgeqrf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
615
0
      lwork_sgeqrf__ = (integer) dum[0];
616
/*           Compute space needed for SORGQR */
617
0
      sorgqr_(m, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
618
0
      lwork_sorgqr_n__ = (integer) dum[0];
619
0
      sorgqr_(m, m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
620
0
      lwork_sorgqr_m__ = (integer) dum[0];
621
/*           Compute space needed for SGEBRD */
622
0
      sgebrd_(n, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
623
0
         &ierr);
624
0
      lwork_sgebrd__ = (integer) dum[0];
625
/*           Compute space needed for SORGBR P */
626
0
      sorgbr_("P", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
627
0
      lwork_sorgbr_p__ = (integer) dum[0];
628
/*           Compute space needed for SORGBR Q */
629
0
      sorgbr_("Q", n, n, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
630
0
      lwork_sorgbr_q__ = (integer) dum[0];
631
632
0
      if (*m >= mnthr) {
633
0
    if (wntun) {
634
635
/*                 Path 1 (M much larger than N, JOBU='N') */
636
637
0
        maxwrk = *n + lwork_sgeqrf__;
638
/* Computing MAX */
639
0
        i__2 = maxwrk, i__3 = *n * 3 + lwork_sgebrd__;
640
0
        maxwrk = f2cmax(i__2,i__3);
641
0
        if (wntvo || wntvas) {
642
/* Computing MAX */
643
0
      i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__;
644
0
      maxwrk = f2cmax(i__2,i__3);
645
0
        }
646
0
        maxwrk = f2cmax(maxwrk,bdspac);
647
/* Computing MAX */
648
0
        i__2 = *n << 2;
649
0
        minwrk = f2cmax(i__2,bdspac);
650
0
    } else if (wntuo && wntvn) {
651
652
/*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
653
654
0
        wrkbl = *n + lwork_sgeqrf__;
655
/* Computing MAX */
656
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
657
0
        wrkbl = f2cmax(i__2,i__3);
658
/* Computing MAX */
659
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
660
0
        wrkbl = f2cmax(i__2,i__3);
661
/* Computing MAX */
662
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
663
0
        wrkbl = f2cmax(i__2,i__3);
664
0
        wrkbl = f2cmax(wrkbl,bdspac);
665
/* Computing MAX */
666
0
        i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
667
0
        maxwrk = f2cmax(i__2,i__3);
668
/* Computing MAX */
669
0
        i__2 = *n * 3 + *m;
670
0
        minwrk = f2cmax(i__2,bdspac);
671
0
    } else if (wntuo && wntvas) {
672
673
/*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or */
674
/*                 'A') */
675
676
0
        wrkbl = *n + lwork_sgeqrf__;
677
/* Computing MAX */
678
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
679
0
        wrkbl = f2cmax(i__2,i__3);
680
/* Computing MAX */
681
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
682
0
        wrkbl = f2cmax(i__2,i__3);
683
/* Computing MAX */
684
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
685
0
        wrkbl = f2cmax(i__2,i__3);
686
/* Computing MAX */
687
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
688
0
        wrkbl = f2cmax(i__2,i__3);
689
0
        wrkbl = f2cmax(wrkbl,bdspac);
690
/* Computing MAX */
691
0
        i__2 = *n * *n + wrkbl, i__3 = *n * *n + *m * *n + *n;
692
0
        maxwrk = f2cmax(i__2,i__3);
693
/* Computing MAX */
694
0
        i__2 = *n * 3 + *m;
695
0
        minwrk = f2cmax(i__2,bdspac);
696
0
    } else if (wntus && wntvn) {
697
698
/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
699
700
0
        wrkbl = *n + lwork_sgeqrf__;
701
/* Computing MAX */
702
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
703
0
        wrkbl = f2cmax(i__2,i__3);
704
/* Computing MAX */
705
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
706
0
        wrkbl = f2cmax(i__2,i__3);
707
/* Computing MAX */
708
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
709
0
        wrkbl = f2cmax(i__2,i__3);
710
0
        wrkbl = f2cmax(wrkbl,bdspac);
711
0
        maxwrk = *n * *n + wrkbl;
712
/* Computing MAX */
713
0
        i__2 = *n * 3 + *m;
714
0
        minwrk = f2cmax(i__2,bdspac);
715
0
    } else if (wntus && wntvo) {
716
717
/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
718
719
0
        wrkbl = *n + lwork_sgeqrf__;
720
/* Computing MAX */
721
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
722
0
        wrkbl = f2cmax(i__2,i__3);
723
/* Computing MAX */
724
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
725
0
        wrkbl = f2cmax(i__2,i__3);
726
/* Computing MAX */
727
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
728
0
        wrkbl = f2cmax(i__2,i__3);
729
/* Computing MAX */
730
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
731
0
        wrkbl = f2cmax(i__2,i__3);
732
0
        wrkbl = f2cmax(wrkbl,bdspac);
733
0
        maxwrk = (*n << 1) * *n + wrkbl;
734
/* Computing MAX */
735
0
        i__2 = *n * 3 + *m;
736
0
        minwrk = f2cmax(i__2,bdspac);
737
0
    } else if (wntus && wntvas) {
738
739
/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or */
740
/*                 'A') */
741
742
0
        wrkbl = *n + lwork_sgeqrf__;
743
/* Computing MAX */
744
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_n__;
745
0
        wrkbl = f2cmax(i__2,i__3);
746
/* Computing MAX */
747
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
748
0
        wrkbl = f2cmax(i__2,i__3);
749
/* Computing MAX */
750
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
751
0
        wrkbl = f2cmax(i__2,i__3);
752
/* Computing MAX */
753
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
754
0
        wrkbl = f2cmax(i__2,i__3);
755
0
        wrkbl = f2cmax(wrkbl,bdspac);
756
0
        maxwrk = *n * *n + wrkbl;
757
/* Computing MAX */
758
0
        i__2 = *n * 3 + *m;
759
0
        minwrk = f2cmax(i__2,bdspac);
760
0
    } else if (wntua && wntvn) {
761
762
/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
763
764
0
        wrkbl = *n + lwork_sgeqrf__;
765
/* Computing MAX */
766
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
767
0
        wrkbl = f2cmax(i__2,i__3);
768
/* Computing MAX */
769
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
770
0
        wrkbl = f2cmax(i__2,i__3);
771
/* Computing MAX */
772
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
773
0
        wrkbl = f2cmax(i__2,i__3);
774
0
        wrkbl = f2cmax(wrkbl,bdspac);
775
0
        maxwrk = *n * *n + wrkbl;
776
/* Computing MAX */
777
0
        i__2 = *n * 3 + *m;
778
0
        minwrk = f2cmax(i__2,bdspac);
779
0
    } else if (wntua && wntvo) {
780
781
/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
782
783
0
        wrkbl = *n + lwork_sgeqrf__;
784
/* Computing MAX */
785
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
786
0
        wrkbl = f2cmax(i__2,i__3);
787
/* Computing MAX */
788
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
789
0
        wrkbl = f2cmax(i__2,i__3);
790
/* Computing MAX */
791
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
792
0
        wrkbl = f2cmax(i__2,i__3);
793
/* Computing MAX */
794
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
795
0
        wrkbl = f2cmax(i__2,i__3);
796
0
        wrkbl = f2cmax(wrkbl,bdspac);
797
0
        maxwrk = (*n << 1) * *n + wrkbl;
798
/* Computing MAX */
799
0
        i__2 = *n * 3 + *m;
800
0
        minwrk = f2cmax(i__2,bdspac);
801
0
    } else if (wntua && wntvas) {
802
803
/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or */
804
/*                 'A') */
805
806
0
        wrkbl = *n + lwork_sgeqrf__;
807
/* Computing MAX */
808
0
        i__2 = wrkbl, i__3 = *n + lwork_sorgqr_m__;
809
0
        wrkbl = f2cmax(i__2,i__3);
810
/* Computing MAX */
811
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sgebrd__;
812
0
        wrkbl = f2cmax(i__2,i__3);
813
/* Computing MAX */
814
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_q__;
815
0
        wrkbl = f2cmax(i__2,i__3);
816
/* Computing MAX */
817
0
        i__2 = wrkbl, i__3 = *n * 3 + lwork_sorgbr_p__;
818
0
        wrkbl = f2cmax(i__2,i__3);
819
0
        wrkbl = f2cmax(wrkbl,bdspac);
820
0
        maxwrk = *n * *n + wrkbl;
821
/* Computing MAX */
822
0
        i__2 = *n * 3 + *m;
823
0
        minwrk = f2cmax(i__2,bdspac);
824
0
    }
825
0
      } else {
826
827
/*              Path 10 (M at least N, but not much larger) */
828
829
0
    sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
830
0
      c_n1, &ierr);
831
0
    lwork_sgebrd__ = (integer) dum[0];
832
0
    maxwrk = *n * 3 + lwork_sgebrd__;
833
0
    if (wntus || wntuo) {
834
0
        sorgbr_("Q", m, n, n, &a[a_offset], lda, dum, dum, &c_n1, 
835
0
          &ierr);
836
0
        lwork_sorgbr_q__ = (integer) dum[0];
837
/* Computing MAX */
838
0
        i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__;
839
0
        maxwrk = f2cmax(i__2,i__3);
840
0
    }
841
0
    if (wntua) {
842
0
        sorgbr_("Q", m, m, n, &a[a_offset], lda, dum, dum, &c_n1, 
843
0
          &ierr);
844
0
        lwork_sorgbr_q__ = (integer) dum[0];
845
/* Computing MAX */
846
0
        i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_q__;
847
0
        maxwrk = f2cmax(i__2,i__3);
848
0
    }
849
0
    if (! wntvn) {
850
/* Computing MAX */
851
0
        i__2 = maxwrk, i__3 = *n * 3 + lwork_sorgbr_p__;
852
0
        maxwrk = f2cmax(i__2,i__3);
853
0
    }
854
0
    maxwrk = f2cmax(maxwrk,bdspac);
855
/* Computing MAX */
856
0
    i__2 = *n * 3 + *m;
857
0
    minwrk = f2cmax(i__2,bdspac);
858
0
      }
859
0
  } else if (minmn > 0) {
860
861
/*           Compute space needed for SBDSQR */
862
863
/* Writing concatenation */
864
0
      i__1[0] = 1, a__1[0] = jobu;
865
0
      i__1[1] = 1, a__1[1] = jobvt;
866
0
      s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
867
0
      mnthr = ilaenv_(&c__6, "SGESVD", ch__1, m, n, &c__0, &c__0, (
868
0
        ftnlen)6, (ftnlen)2);
869
0
      bdspac = *m * 5;
870
/*           Compute space needed for SGELQF */
871
0
      sgelqf_(m, n, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
872
0
      lwork_sgelqf__ = (integer) dum[0];
873
/*           Compute space needed for SORGLQ */
874
0
      sorglq_(n, n, m, dum, n, dum, dum, &c_n1, &ierr);
875
0
      lwork_sorglq_n__ = (integer) dum[0];
876
0
      sorglq_(m, n, m, &a[a_offset], lda, dum, dum, &c_n1, &ierr);
877
0
      lwork_sorglq_m__ = (integer) dum[0];
878
/*           Compute space needed for SGEBRD */
879
0
      sgebrd_(m, m, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &c_n1,
880
0
         &ierr);
881
0
      lwork_sgebrd__ = (integer) dum[0];
882
/*            Compute space needed for SORGBR P */
883
0
      sorgbr_("P", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
884
0
      lwork_sorgbr_p__ = (integer) dum[0];
885
/*           Compute space needed for SORGBR Q */
886
0
      sorgbr_("Q", m, m, m, &a[a_offset], n, dum, dum, &c_n1, &ierr);
887
0
      lwork_sorgbr_q__ = (integer) dum[0];
888
0
      if (*n >= mnthr) {
889
0
    if (wntvn) {
890
891
/*                 Path 1t(N much larger than M, JOBVT='N') */
892
893
0
        maxwrk = *m + lwork_sgelqf__;
894
/* Computing MAX */
895
0
        i__2 = maxwrk, i__3 = *m * 3 + lwork_sgebrd__;
896
0
        maxwrk = f2cmax(i__2,i__3);
897
0
        if (wntuo || wntuas) {
898
/* Computing MAX */
899
0
      i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__;
900
0
      maxwrk = f2cmax(i__2,i__3);
901
0
        }
902
0
        maxwrk = f2cmax(maxwrk,bdspac);
903
/* Computing MAX */
904
0
        i__2 = *m << 2;
905
0
        minwrk = f2cmax(i__2,bdspac);
906
0
    } else if (wntvo && wntun) {
907
908
/*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
909
910
0
        wrkbl = *m + lwork_sgelqf__;
911
/* Computing MAX */
912
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
913
0
        wrkbl = f2cmax(i__2,i__3);
914
/* Computing MAX */
915
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
916
0
        wrkbl = f2cmax(i__2,i__3);
917
/* Computing MAX */
918
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
919
0
        wrkbl = f2cmax(i__2,i__3);
920
0
        wrkbl = f2cmax(wrkbl,bdspac);
921
/* Computing MAX */
922
0
        i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
923
0
        maxwrk = f2cmax(i__2,i__3);
924
/* Computing MAX */
925
0
        i__2 = *m * 3 + *n;
926
0
        minwrk = f2cmax(i__2,bdspac);
927
0
    } else if (wntvo && wntuas) {
928
929
/*                 Path 3t(N much larger than M, JOBU='S' or 'A', */
930
/*                 JOBVT='O') */
931
932
0
        wrkbl = *m + lwork_sgelqf__;
933
/* Computing MAX */
934
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
935
0
        wrkbl = f2cmax(i__2,i__3);
936
/* Computing MAX */
937
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
938
0
        wrkbl = f2cmax(i__2,i__3);
939
/* Computing MAX */
940
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
941
0
        wrkbl = f2cmax(i__2,i__3);
942
/* Computing MAX */
943
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
944
0
        wrkbl = f2cmax(i__2,i__3);
945
0
        wrkbl = f2cmax(wrkbl,bdspac);
946
/* Computing MAX */
947
0
        i__2 = *m * *m + wrkbl, i__3 = *m * *m + *m * *n + *m;
948
0
        maxwrk = f2cmax(i__2,i__3);
949
/* Computing MAX */
950
0
        i__2 = *m * 3 + *n;
951
0
        minwrk = f2cmax(i__2,bdspac);
952
0
    } else if (wntvs && wntun) {
953
954
/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
955
956
0
        wrkbl = *m + lwork_sgelqf__;
957
/* Computing MAX */
958
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
959
0
        wrkbl = f2cmax(i__2,i__3);
960
/* Computing MAX */
961
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
962
0
        wrkbl = f2cmax(i__2,i__3);
963
/* Computing MAX */
964
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
965
0
        wrkbl = f2cmax(i__2,i__3);
966
0
        wrkbl = f2cmax(wrkbl,bdspac);
967
0
        maxwrk = *m * *m + wrkbl;
968
/* Computing MAX */
969
0
        i__2 = *m * 3 + *n;
970
0
        minwrk = f2cmax(i__2,bdspac);
971
0
    } else if (wntvs && wntuo) {
972
973
/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
974
975
0
        wrkbl = *m + lwork_sgelqf__;
976
/* Computing MAX */
977
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
978
0
        wrkbl = f2cmax(i__2,i__3);
979
/* Computing MAX */
980
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
981
0
        wrkbl = f2cmax(i__2,i__3);
982
/* Computing MAX */
983
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
984
0
        wrkbl = f2cmax(i__2,i__3);
985
/* Computing MAX */
986
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
987
0
        wrkbl = f2cmax(i__2,i__3);
988
0
        wrkbl = f2cmax(wrkbl,bdspac);
989
0
        maxwrk = (*m << 1) * *m + wrkbl;
990
/* Computing MAX */
991
0
        i__2 = *m * 3 + *n;
992
0
        minwrk = f2cmax(i__2,bdspac);
993
0
        maxwrk = f2cmax(maxwrk,minwrk);
994
0
    } else if (wntvs && wntuas) {
995
996
/*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
997
/*                 JOBVT='S') */
998
999
0
        wrkbl = *m + lwork_sgelqf__;
1000
/* Computing MAX */
1001
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_m__;
1002
0
        wrkbl = f2cmax(i__2,i__3);
1003
/* Computing MAX */
1004
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1005
0
        wrkbl = f2cmax(i__2,i__3);
1006
/* Computing MAX */
1007
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1008
0
        wrkbl = f2cmax(i__2,i__3);
1009
/* Computing MAX */
1010
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1011
0
        wrkbl = f2cmax(i__2,i__3);
1012
0
        wrkbl = f2cmax(wrkbl,bdspac);
1013
0
        maxwrk = *m * *m + wrkbl;
1014
/* Computing MAX */
1015
0
        i__2 = *m * 3 + *n;
1016
0
        minwrk = f2cmax(i__2,bdspac);
1017
0
    } else if (wntva && wntun) {
1018
1019
/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
1020
1021
0
        wrkbl = *m + lwork_sgelqf__;
1022
/* Computing MAX */
1023
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
1024
0
        wrkbl = f2cmax(i__2,i__3);
1025
/* Computing MAX */
1026
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1027
0
        wrkbl = f2cmax(i__2,i__3);
1028
/* Computing MAX */
1029
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1030
0
        wrkbl = f2cmax(i__2,i__3);
1031
0
        wrkbl = f2cmax(wrkbl,bdspac);
1032
0
        maxwrk = *m * *m + wrkbl;
1033
/* Computing MAX */
1034
0
        i__2 = *m * 3 + *n;
1035
0
        minwrk = f2cmax(i__2,bdspac);
1036
0
    } else if (wntva && wntuo) {
1037
1038
/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
1039
1040
0
        wrkbl = *m + lwork_sgelqf__;
1041
/* Computing MAX */
1042
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
1043
0
        wrkbl = f2cmax(i__2,i__3);
1044
/* Computing MAX */
1045
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1046
0
        wrkbl = f2cmax(i__2,i__3);
1047
/* Computing MAX */
1048
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1049
0
        wrkbl = f2cmax(i__2,i__3);
1050
/* Computing MAX */
1051
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1052
0
        wrkbl = f2cmax(i__2,i__3);
1053
0
        wrkbl = f2cmax(wrkbl,bdspac);
1054
0
        maxwrk = (*m << 1) * *m + wrkbl;
1055
/* Computing MAX */
1056
0
        i__2 = *m * 3 + *n;
1057
0
        minwrk = f2cmax(i__2,bdspac);
1058
0
    } else if (wntva && wntuas) {
1059
1060
/*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
1061
/*                 JOBVT='A') */
1062
1063
0
        wrkbl = *m + lwork_sgelqf__;
1064
/* Computing MAX */
1065
0
        i__2 = wrkbl, i__3 = *m + lwork_sorglq_n__;
1066
0
        wrkbl = f2cmax(i__2,i__3);
1067
/* Computing MAX */
1068
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sgebrd__;
1069
0
        wrkbl = f2cmax(i__2,i__3);
1070
/* Computing MAX */
1071
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_p__;
1072
0
        wrkbl = f2cmax(i__2,i__3);
1073
/* Computing MAX */
1074
0
        i__2 = wrkbl, i__3 = *m * 3 + lwork_sorgbr_q__;
1075
0
        wrkbl = f2cmax(i__2,i__3);
1076
0
        wrkbl = f2cmax(wrkbl,bdspac);
1077
0
        maxwrk = *m * *m + wrkbl;
1078
/* Computing MAX */
1079
0
        i__2 = *m * 3 + *n;
1080
0
        minwrk = f2cmax(i__2,bdspac);
1081
0
    }
1082
0
      } else {
1083
1084
/*              Path 10t(N greater than M, but not much larger) */
1085
1086
0
    sgebrd_(m, n, &a[a_offset], lda, &s[1], dum, dum, dum, dum, &
1087
0
      c_n1, &ierr);
1088
0
    lwork_sgebrd__ = (integer) dum[0];
1089
0
    maxwrk = *m * 3 + lwork_sgebrd__;
1090
0
    if (wntvs || wntvo) {
1091
/*                Compute space needed for SORGBR P */
1092
0
        sorgbr_("P", m, n, m, &a[a_offset], n, dum, dum, &c_n1, &
1093
0
          ierr);
1094
0
        lwork_sorgbr_p__ = (integer) dum[0];
1095
/* Computing MAX */
1096
0
        i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__;
1097
0
        maxwrk = f2cmax(i__2,i__3);
1098
0
    }
1099
0
    if (wntva) {
1100
0
        sorgbr_("P", n, n, m, &a[a_offset], n, dum, dum, &c_n1, &
1101
0
          ierr);
1102
0
        lwork_sorgbr_p__ = (integer) dum[0];
1103
/* Computing MAX */
1104
0
        i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_p__;
1105
0
        maxwrk = f2cmax(i__2,i__3);
1106
0
    }
1107
0
    if (! wntun) {
1108
/* Computing MAX */
1109
0
        i__2 = maxwrk, i__3 = *m * 3 + lwork_sorgbr_q__;
1110
0
        maxwrk = f2cmax(i__2,i__3);
1111
0
    }
1112
0
    maxwrk = f2cmax(maxwrk,bdspac);
1113
/* Computing MAX */
1114
0
    i__2 = *m * 3 + *n;
1115
0
    minwrk = f2cmax(i__2,bdspac);
1116
0
      }
1117
0
  }
1118
0
  maxwrk = f2cmax(maxwrk,minwrk);
1119
0
  work[1] = (real) maxwrk;
1120
1121
0
  if (*lwork < minwrk && ! lquery) {
1122
0
      *info = -13;
1123
0
  }
1124
0
    }
1125
1126
0
    if (*info != 0) {
1127
0
  i__2 = -(*info);
1128
0
  xerbla_("SGESVD", &i__2, (ftnlen)6);
1129
0
  return;
1130
0
    } else if (lquery) {
1131
0
  return;
1132
0
    }
1133
1134
/*     Quick return if possible */
1135
1136
0
    if (*m == 0 || *n == 0) {
1137
0
  return;
1138
0
    }
1139
1140
/*     Get machine constants */
1141
1142
0
    eps = slamch_("P");
1143
0
    smlnum = sqrt(slamch_("S")) / eps;
1144
0
    bignum = 1.f / smlnum;
1145
1146
/*     Scale A if f2cmax element outside range [SMLNUM,BIGNUM] */
1147
1148
0
    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
1149
0
    iscl = 0;
1150
0
    if (anrm > 0.f && anrm < smlnum) {
1151
0
  iscl = 1;
1152
0
  slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
1153
0
    ierr);
1154
0
    } else if (anrm > bignum) {
1155
0
  iscl = 1;
1156
0
  slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
1157
0
    ierr);
1158
0
    }
1159
1160
0
    if (*m >= *n) {
1161
1162
/*        A has at least as many rows as columns. If A has sufficiently */
1163
/*        more rows than columns, first reduce using the QR */
1164
/*        decomposition (if sufficient workspace available) */
1165
1166
0
  if (*m >= mnthr) {
1167
1168
0
      if (wntun) {
1169
1170
/*              Path 1 (M much larger than N, JOBU='N') */
1171
/*              No left singular vectors to be computed */
1172
1173
0
    itau = 1;
1174
0
    iwork = itau + *n;
1175
1176
/*              Compute A=Q*R */
1177
/*              (Workspace: need 2*N, prefer N+N*NB) */
1178
1179
0
    i__2 = *lwork - iwork + 1;
1180
0
    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
1181
0
      i__2, &ierr);
1182
1183
/*              Zero out below R */
1184
1185
0
    if (*n > 1) {
1186
0
        i__2 = *n - 1;
1187
0
        i__3 = *n - 1;
1188
0
        slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[a_dim1 + 2],
1189
0
           lda);
1190
0
    }
1191
0
    ie = 1;
1192
0
    itauq = ie + *n;
1193
0
    itaup = itauq + *n;
1194
0
    iwork = itaup + *n;
1195
1196
/*              Bidiagonalize R in A */
1197
/*              (Workspace: need 4*N, prefer 3*N+2*N*NB) */
1198
1199
0
    i__2 = *lwork - iwork + 1;
1200
0
    sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
1201
0
      itauq], &work[itaup], &work[iwork], &i__2, &ierr);
1202
0
    ncvt = 0;
1203
0
    if (wntvo || wntvas) {
1204
1205
/*                 If right singular vectors desired, generate P'. */
1206
/*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
1207
1208
0
        i__2 = *lwork - iwork + 1;
1209
0
        sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &
1210
0
          work[iwork], &i__2, &ierr);
1211
0
        ncvt = *n;
1212
0
    }
1213
0
    iwork = ie + *n;
1214
1215
/*              Perform bidiagonal QR iteration, computing right */
1216
/*              singular vectors of A in A if desired */
1217
/*              (Workspace: need BDSPAC) */
1218
1219
0
    sbdsqr_("U", n, &ncvt, &c__0, &c__0, &s[1], &work[ie], &a[
1220
0
      a_offset], lda, dum, &c__1, dum, &c__1, &work[iwork], 
1221
0
      info);
1222
1223
/*              If right singular vectors desired in VT, copy them there */
1224
1225
0
    if (wntvas) {
1226
0
        slacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], 
1227
0
          ldvt);
1228
0
    }
1229
1230
0
      } else if (wntuo && wntvn) {
1231
1232
/*              Path 2 (M much larger than N, JOBU='O', JOBVT='N') */
1233
/*              N left singular vectors to be overwritten on A and */
1234
/*              no right singular vectors to be computed */
1235
1236
/* Computing MAX */
1237
0
    i__2 = *n << 2;
1238
0
    if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
1239
1240
/*                 Sufficient workspace for a fast algorithm */
1241
1242
0
        ir = 1;
1243
/* Computing MAX */
1244
0
        i__2 = wrkbl, i__3 = *lda * *n + *n;
1245
0
        if (*lwork >= f2cmax(i__2,i__3) + *lda * *n) {
1246
1247
/*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N */
1248
1249
0
      ldwrku = *lda;
1250
0
      ldwrkr = *lda;
1251
0
        } else /* if(complicated condition) */ {
1252
/* Computing MAX */
1253
0
      i__2 = wrkbl, i__3 = *lda * *n + *n;
1254
0
      if (*lwork >= f2cmax(i__2,i__3) + *n * *n) {
1255
1256
/*                    WORK(IU) is LDA by N, WORK(IR) is N by N */
1257
1258
0
          ldwrku = *lda;
1259
0
          ldwrkr = *n;
1260
0
      } else {
1261
1262
/*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N */
1263
1264
0
          ldwrku = (*lwork - *n * *n - *n) / *n;
1265
0
          ldwrkr = *n;
1266
0
      }
1267
0
        }
1268
0
        itau = ir + ldwrkr * *n;
1269
0
        iwork = itau + *n;
1270
1271
/*                 Compute A=Q*R */
1272
/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1273
1274
0
        i__2 = *lwork - iwork + 1;
1275
0
        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1276
0
          , &i__2, &ierr);
1277
1278
/*                 Copy R to WORK(IR) and zero out below it */
1279
1280
0
        slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
1281
0
        i__2 = *n - 1;
1282
0
        i__3 = *n - 1;
1283
0
        slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 1], 
1284
0
          &ldwrkr);
1285
1286
/*                 Generate Q in A */
1287
/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1288
1289
0
        i__2 = *lwork - iwork + 1;
1290
0
        sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1291
0
          iwork], &i__2, &ierr);
1292
0
        ie = itau;
1293
0
        itauq = ie + *n;
1294
0
        itaup = itauq + *n;
1295
0
        iwork = itaup + *n;
1296
1297
/*                 Bidiagonalize R in WORK(IR) */
1298
/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
1299
1300
0
        i__2 = *lwork - iwork + 1;
1301
0
        sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
1302
0
          itauq], &work[itaup], &work[iwork], &i__2, &ierr);
1303
1304
/*                 Generate left vectors bidiagonalizing R */
1305
/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
1306
1307
0
        i__2 = *lwork - iwork + 1;
1308
0
        sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
1309
0
          work[iwork], &i__2, &ierr);
1310
0
        iwork = ie + *n;
1311
1312
/*                 Perform bidiagonal QR iteration, computing left */
1313
/*                 singular vectors of R in WORK(IR) */
1314
/*                 (Workspace: need N*N+BDSPAC) */
1315
1316
0
        sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], dum, &
1317
0
          c__1, &work[ir], &ldwrkr, dum, &c__1, &work[iwork]
1318
0
          , info);
1319
0
        iu = ie + *n;
1320
1321
/*                 Multiply Q in A by left singular vectors of R in */
1322
/*                 WORK(IR), storing result in WORK(IU) and copying to A */
1323
/*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
1324
1325
0
        i__2 = *m;
1326
0
        i__3 = ldwrku;
1327
0
        for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
1328
0
           i__3) {
1329
/* Computing MIN */
1330
0
      i__4 = *m - i__ + 1;
1331
0
      chunk = f2cmin(i__4,ldwrku);
1332
0
      sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + 
1333
0
        a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
1334
0
        work[iu], &ldwrku);
1335
0
      slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
1336
0
        a_dim1], lda);
1337
/* L10: */
1338
0
        }
1339
1340
0
    } else {
1341
1342
/*                 Insufficient workspace for a fast algorithm */
1343
1344
0
        ie = 1;
1345
0
        itauq = ie + *n;
1346
0
        itaup = itauq + *n;
1347
0
        iwork = itaup + *n;
1348
1349
/*                 Bidiagonalize A */
1350
/*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
1351
1352
0
        i__3 = *lwork - iwork + 1;
1353
0
        sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
1354
0
          itauq], &work[itaup], &work[iwork], &i__3, &ierr);
1355
1356
/*                 Generate left vectors bidiagonalizing A */
1357
/*                 (Workspace: need 4*N, prefer 3*N+N*NB) */
1358
1359
0
        i__3 = *lwork - iwork + 1;
1360
0
        sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
1361
0
          work[iwork], &i__3, &ierr);
1362
0
        iwork = ie + *n;
1363
1364
/*                 Perform bidiagonal QR iteration, computing left */
1365
/*                 singular vectors of A in A */
1366
/*                 (Workspace: need BDSPAC) */
1367
1368
0
        sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], dum, &
1369
0
          c__1, &a[a_offset], lda, dum, &c__1, &work[iwork],
1370
0
           info);
1371
1372
0
    }
1373
1374
0
      } else if (wntuo && wntvas) {
1375
1376
/*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') */
1377
/*              N left singular vectors to be overwritten on A and */
1378
/*              N right singular vectors to be computed in VT */
1379
1380
/* Computing MAX */
1381
0
    i__3 = *n << 2;
1382
0
    if (*lwork >= *n * *n + f2cmax(i__3,bdspac)) {
1383
1384
/*                 Sufficient workspace for a fast algorithm */
1385
1386
0
        ir = 1;
1387
/* Computing MAX */
1388
0
        i__3 = wrkbl, i__2 = *lda * *n + *n;
1389
0
        if (*lwork >= f2cmax(i__3,i__2) + *lda * *n) {
1390
1391
/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N */
1392
1393
0
      ldwrku = *lda;
1394
0
      ldwrkr = *lda;
1395
0
        } else /* if(complicated condition) */ {
1396
/* Computing MAX */
1397
0
      i__3 = wrkbl, i__2 = *lda * *n + *n;
1398
0
      if (*lwork >= f2cmax(i__3,i__2) + *n * *n) {
1399
1400
/*                    WORK(IU) is LDA by N and WORK(IR) is N by N */
1401
1402
0
          ldwrku = *lda;
1403
0
          ldwrkr = *n;
1404
0
      } else {
1405
1406
/*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N */
1407
1408
0
          ldwrku = (*lwork - *n * *n - *n) / *n;
1409
0
          ldwrkr = *n;
1410
0
      }
1411
0
        }
1412
0
        itau = ir + ldwrkr * *n;
1413
0
        iwork = itau + *n;
1414
1415
/*                 Compute A=Q*R */
1416
/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1417
1418
0
        i__3 = *lwork - iwork + 1;
1419
0
        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1420
0
          , &i__3, &ierr);
1421
1422
/*                 Copy R to VT, zeroing out below it */
1423
1424
0
        slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
1425
0
          ldvt);
1426
0
        if (*n > 1) {
1427
0
      i__3 = *n - 1;
1428
0
      i__2 = *n - 1;
1429
0
      slaset_("L", &i__3, &i__2, &c_b57, &c_b57, &vt[
1430
0
        vt_dim1 + 2], ldvt);
1431
0
        }
1432
1433
/*                 Generate Q in A */
1434
/*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1435
1436
0
        i__3 = *lwork - iwork + 1;
1437
0
        sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1438
0
          iwork], &i__3, &ierr);
1439
0
        ie = itau;
1440
0
        itauq = ie + *n;
1441
0
        itaup = itauq + *n;
1442
0
        iwork = itaup + *n;
1443
1444
/*                 Bidiagonalize R in VT, copying result to WORK(IR) */
1445
/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
1446
1447
0
        i__3 = *lwork - iwork + 1;
1448
0
        sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
1449
0
          work[itauq], &work[itaup], &work[iwork], &i__3, &
1450
0
          ierr);
1451
0
        slacpy_("L", n, n, &vt[vt_offset], ldvt, &work[ir], &
1452
0
          ldwrkr);
1453
1454
/*                 Generate left vectors bidiagonalizing R in WORK(IR) */
1455
/*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
1456
1457
0
        i__3 = *lwork - iwork + 1;
1458
0
        sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq], &
1459
0
          work[iwork], &i__3, &ierr);
1460
1461
/*                 Generate right vectors bidiagonalizing R in VT */
1462
/*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) */
1463
1464
0
        i__3 = *lwork - iwork + 1;
1465
0
        sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
1466
0
          &work[iwork], &i__3, &ierr);
1467
0
        iwork = ie + *n;
1468
1469
/*                 Perform bidiagonal QR iteration, computing left */
1470
/*                 singular vectors of R in WORK(IR) and computing right */
1471
/*                 singular vectors of R in VT */
1472
/*                 (Workspace: need N*N+BDSPAC) */
1473
1474
0
        sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
1475
0
          vt_offset], ldvt, &work[ir], &ldwrkr, dum, &c__1, 
1476
0
          &work[iwork], info);
1477
0
        iu = ie + *n;
1478
1479
/*                 Multiply Q in A by left singular vectors of R in */
1480
/*                 WORK(IR), storing result in WORK(IU) and copying to A */
1481
/*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N) */
1482
1483
0
        i__3 = *m;
1484
0
        i__2 = ldwrku;
1485
0
        for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
1486
0
           i__2) {
1487
/* Computing MIN */
1488
0
      i__4 = *m - i__ + 1;
1489
0
      chunk = f2cmin(i__4,ldwrku);
1490
0
      sgemm_("N", "N", &chunk, n, n, &c_b79, &a[i__ + 
1491
0
        a_dim1], lda, &work[ir], &ldwrkr, &c_b57, &
1492
0
        work[iu], &ldwrku);
1493
0
      slacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ + 
1494
0
        a_dim1], lda);
1495
/* L20: */
1496
0
        }
1497
1498
0
    } else {
1499
1500
/*                 Insufficient workspace for a fast algorithm */
1501
1502
0
        itau = 1;
1503
0
        iwork = itau + *n;
1504
1505
/*                 Compute A=Q*R */
1506
/*                 (Workspace: need 2*N, prefer N+N*NB) */
1507
1508
0
        i__2 = *lwork - iwork + 1;
1509
0
        sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
1510
0
          , &i__2, &ierr);
1511
1512
/*                 Copy R to VT, zeroing out below it */
1513
1514
0
        slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
1515
0
          ldvt);
1516
0
        if (*n > 1) {
1517
0
      i__2 = *n - 1;
1518
0
      i__3 = *n - 1;
1519
0
      slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
1520
0
        vt_dim1 + 2], ldvt);
1521
0
        }
1522
1523
/*                 Generate Q in A */
1524
/*                 (Workspace: need 2*N, prefer N+N*NB) */
1525
1526
0
        i__2 = *lwork - iwork + 1;
1527
0
        sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[
1528
0
          iwork], &i__2, &ierr);
1529
0
        ie = itau;
1530
0
        itauq = ie + *n;
1531
0
        itaup = itauq + *n;
1532
0
        iwork = itaup + *n;
1533
1534
/*                 Bidiagonalize R in VT */
1535
/*                 (Workspace: need 4*N, prefer 3*N+2*N*NB) */
1536
1537
0
        i__2 = *lwork - iwork + 1;
1538
0
        sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], &
1539
0
          work[itauq], &work[itaup], &work[iwork], &i__2, &
1540
0
          ierr);
1541
1542
/*                 Multiply Q in A by left vectors bidiagonalizing R */
1543
/*                 (Workspace: need 3*N+M, prefer 3*N+M*NB) */
1544
1545
0
        i__2 = *lwork - iwork + 1;
1546
0
        sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, &
1547
0
          work[itauq], &a[a_offset], lda, &work[iwork], &
1548
0
          i__2, &ierr);
1549
1550
/*                 Generate right vectors bidiagonalizing R in VT */
1551
/*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
1552
1553
0
        i__2 = *lwork - iwork + 1;
1554
0
        sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], 
1555
0
          &work[iwork], &i__2, &ierr);
1556
0
        iwork = ie + *n;
1557
1558
/*                 Perform bidiagonal QR iteration, computing left */
1559
/*                 singular vectors of A in A and computing right */
1560
/*                 singular vectors of A in VT */
1561
/*                 (Workspace: need BDSPAC) */
1562
1563
0
        sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
1564
0
          vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
1565
0
          work[iwork], info);
1566
1567
0
    }
1568
1569
0
      } else if (wntus) {
1570
1571
0
    if (wntvn) {
1572
1573
/*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N') */
1574
/*                 N left singular vectors to be computed in U and */
1575
/*                 no right singular vectors to be computed */
1576
1577
/* Computing MAX */
1578
0
        i__2 = *n << 2;
1579
0
        if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
1580
1581
/*                    Sufficient workspace for a fast algorithm */
1582
1583
0
      ir = 1;
1584
0
      if (*lwork >= wrkbl + *lda * *n) {
1585
1586
/*                       WORK(IR) is LDA by N */
1587
1588
0
          ldwrkr = *lda;
1589
0
      } else {
1590
1591
/*                       WORK(IR) is N by N */
1592
1593
0
          ldwrkr = *n;
1594
0
      }
1595
0
      itau = ir + ldwrkr * *n;
1596
0
      iwork = itau + *n;
1597
1598
/*                    Compute A=Q*R */
1599
/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1600
1601
0
      i__2 = *lwork - iwork + 1;
1602
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1603
0
        iwork], &i__2, &ierr);
1604
1605
/*                    Copy R to WORK(IR), zeroing out below it */
1606
1607
0
      slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
1608
0
        ldwrkr);
1609
0
      i__2 = *n - 1;
1610
0
      i__3 = *n - 1;
1611
0
      slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
1612
0
        1], &ldwrkr);
1613
1614
/*                    Generate Q in A */
1615
/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1616
1617
0
      i__2 = *lwork - iwork + 1;
1618
0
      sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
1619
0
        work[iwork], &i__2, &ierr);
1620
0
      ie = itau;
1621
0
      itauq = ie + *n;
1622
0
      itaup = itauq + *n;
1623
0
      iwork = itaup + *n;
1624
1625
/*                    Bidiagonalize R in WORK(IR) */
1626
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
1627
1628
0
      i__2 = *lwork - iwork + 1;
1629
0
      sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
1630
0
        work[itauq], &work[itaup], &work[iwork], &
1631
0
        i__2, &ierr);
1632
1633
/*                    Generate left vectors bidiagonalizing R in WORK(IR) */
1634
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
1635
1636
0
      i__2 = *lwork - iwork + 1;
1637
0
      sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
1638
0
        , &work[iwork], &i__2, &ierr);
1639
0
      iwork = ie + *n;
1640
1641
/*                    Perform bidiagonal QR iteration, computing left */
1642
/*                    singular vectors of R in WORK(IR) */
1643
/*                    (Workspace: need N*N+BDSPAC) */
1644
1645
0
      sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
1646
0
        dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
1647
0
        work[iwork], info);
1648
1649
/*                    Multiply Q in A by left singular vectors of R in */
1650
/*                    WORK(IR), storing result in U */
1651
/*                    (Workspace: need N*N) */
1652
1653
0
      sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
1654
0
        work[ir], &ldwrkr, &c_b57, &u[u_offset], ldu);
1655
1656
0
        } else {
1657
1658
/*                    Insufficient workspace for a fast algorithm */
1659
1660
0
      itau = 1;
1661
0
      iwork = itau + *n;
1662
1663
/*                    Compute A=Q*R, copying result to U */
1664
/*                    (Workspace: need 2*N, prefer N+N*NB) */
1665
1666
0
      i__2 = *lwork - iwork + 1;
1667
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1668
0
        iwork], &i__2, &ierr);
1669
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
1670
0
        ldu);
1671
1672
/*                    Generate Q in U */
1673
/*                    (Workspace: need 2*N, prefer N+N*NB) */
1674
1675
0
      i__2 = *lwork - iwork + 1;
1676
0
      sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
1677
0
        work[iwork], &i__2, &ierr);
1678
0
      ie = itau;
1679
0
      itauq = ie + *n;
1680
0
      itaup = itauq + *n;
1681
0
      iwork = itaup + *n;
1682
1683
/*                    Zero out below R in A */
1684
1685
0
      if (*n > 1) {
1686
0
          i__2 = *n - 1;
1687
0
          i__3 = *n - 1;
1688
0
          slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
1689
0
            a_dim1 + 2], lda);
1690
0
      }
1691
1692
/*                    Bidiagonalize R in A */
1693
/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
1694
1695
0
      i__2 = *lwork - iwork + 1;
1696
0
      sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
1697
0
        work[itauq], &work[itaup], &work[iwork], &
1698
0
        i__2, &ierr);
1699
1700
/*                    Multiply Q in U by left vectors bidiagonalizing R */
1701
/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
1702
1703
0
      i__2 = *lwork - iwork + 1;
1704
0
      sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
1705
0
        work[itauq], &u[u_offset], ldu, &work[iwork], 
1706
0
        &i__2, &ierr)
1707
0
        ;
1708
0
      iwork = ie + *n;
1709
1710
/*                    Perform bidiagonal QR iteration, computing left */
1711
/*                    singular vectors of A in U */
1712
/*                    (Workspace: need BDSPAC) */
1713
1714
0
      sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
1715
0
        dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
1716
0
        work[iwork], info);
1717
1718
0
        }
1719
1720
0
    } else if (wntvo) {
1721
1722
/*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O') */
1723
/*                 N left singular vectors to be computed in U and */
1724
/*                 N right singular vectors to be overwritten on A */
1725
1726
/* Computing MAX */
1727
0
        i__2 = *n << 2;
1728
0
        if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
1729
1730
/*                    Sufficient workspace for a fast algorithm */
1731
1732
0
      iu = 1;
1733
0
      if (*lwork >= wrkbl + (*lda << 1) * *n) {
1734
1735
/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
1736
1737
0
          ldwrku = *lda;
1738
0
          ir = iu + ldwrku * *n;
1739
0
          ldwrkr = *lda;
1740
0
      } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
1741
1742
/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
1743
1744
0
          ldwrku = *lda;
1745
0
          ir = iu + ldwrku * *n;
1746
0
          ldwrkr = *n;
1747
0
      } else {
1748
1749
/*                       WORK(IU) is N by N and WORK(IR) is N by N */
1750
1751
0
          ldwrku = *n;
1752
0
          ir = iu + ldwrku * *n;
1753
0
          ldwrkr = *n;
1754
0
      }
1755
0
      itau = ir + ldwrkr * *n;
1756
0
      iwork = itau + *n;
1757
1758
/*                    Compute A=Q*R */
1759
/*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
1760
1761
0
      i__2 = *lwork - iwork + 1;
1762
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1763
0
        iwork], &i__2, &ierr);
1764
1765
/*                    Copy R to WORK(IU), zeroing out below it */
1766
1767
0
      slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
1768
0
        ldwrku);
1769
0
      i__2 = *n - 1;
1770
0
      i__3 = *n - 1;
1771
0
      slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
1772
0
        1], &ldwrku);
1773
1774
/*                    Generate Q in A */
1775
/*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
1776
1777
0
      i__2 = *lwork - iwork + 1;
1778
0
      sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
1779
0
        work[iwork], &i__2, &ierr);
1780
0
      ie = itau;
1781
0
      itauq = ie + *n;
1782
0
      itaup = itauq + *n;
1783
0
      iwork = itaup + *n;
1784
1785
/*                    Bidiagonalize R in WORK(IU), copying result to */
1786
/*                    WORK(IR) */
1787
/*                    (Workspace: need 2*N*N+4*N, */
1788
/*                                prefer 2*N*N+3*N+2*N*NB) */
1789
1790
0
      i__2 = *lwork - iwork + 1;
1791
0
      sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
1792
0
        work[itauq], &work[itaup], &work[iwork], &
1793
0
        i__2, &ierr);
1794
0
      slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
1795
0
        ldwrkr);
1796
1797
/*                    Generate left bidiagonalizing vectors in WORK(IU) */
1798
/*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
1799
1800
0
      i__2 = *lwork - iwork + 1;
1801
0
      sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
1802
0
        , &work[iwork], &i__2, &ierr);
1803
1804
/*                    Generate right bidiagonalizing vectors in WORK(IR) */
1805
/*                    (Workspace: need 2*N*N+4*N-1, */
1806
/*                                prefer 2*N*N+3*N+(N-1)*NB) */
1807
1808
0
      i__2 = *lwork - iwork + 1;
1809
0
      sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
1810
0
        , &work[iwork], &i__2, &ierr);
1811
0
      iwork = ie + *n;
1812
1813
/*                    Perform bidiagonal QR iteration, computing left */
1814
/*                    singular vectors of R in WORK(IU) and computing */
1815
/*                    right singular vectors of R in WORK(IR) */
1816
/*                    (Workspace: need 2*N*N+BDSPAC) */
1817
1818
0
      sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
1819
0
        ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
1820
0
        &work[iwork], info);
1821
1822
/*                    Multiply Q in A by left singular vectors of R in */
1823
/*                    WORK(IU), storing result in U */
1824
/*                    (Workspace: need N*N) */
1825
1826
0
      sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
1827
0
        work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
1828
1829
/*                    Copy right singular vectors of R to A */
1830
/*                    (Workspace: need N*N) */
1831
1832
0
      slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
1833
0
        lda);
1834
1835
0
        } else {
1836
1837
/*                    Insufficient workspace for a fast algorithm */
1838
1839
0
      itau = 1;
1840
0
      iwork = itau + *n;
1841
1842
/*                    Compute A=Q*R, copying result to U */
1843
/*                    (Workspace: need 2*N, prefer N+N*NB) */
1844
1845
0
      i__2 = *lwork - iwork + 1;
1846
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1847
0
        iwork], &i__2, &ierr);
1848
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
1849
0
        ldu);
1850
1851
/*                    Generate Q in U */
1852
/*                    (Workspace: need 2*N, prefer N+N*NB) */
1853
1854
0
      i__2 = *lwork - iwork + 1;
1855
0
      sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
1856
0
        work[iwork], &i__2, &ierr);
1857
0
      ie = itau;
1858
0
      itauq = ie + *n;
1859
0
      itaup = itauq + *n;
1860
0
      iwork = itaup + *n;
1861
1862
/*                    Zero out below R in A */
1863
1864
0
      if (*n > 1) {
1865
0
          i__2 = *n - 1;
1866
0
          i__3 = *n - 1;
1867
0
          slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
1868
0
            a_dim1 + 2], lda);
1869
0
      }
1870
1871
/*                    Bidiagonalize R in A */
1872
/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
1873
1874
0
      i__2 = *lwork - iwork + 1;
1875
0
      sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
1876
0
        work[itauq], &work[itaup], &work[iwork], &
1877
0
        i__2, &ierr);
1878
1879
/*                    Multiply Q in U by left vectors bidiagonalizing R */
1880
/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
1881
1882
0
      i__2 = *lwork - iwork + 1;
1883
0
      sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
1884
0
        work[itauq], &u[u_offset], ldu, &work[iwork], 
1885
0
        &i__2, &ierr)
1886
0
        ;
1887
1888
/*                    Generate right vectors bidiagonalizing R in A */
1889
/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
1890
1891
0
      i__2 = *lwork - iwork + 1;
1892
0
      sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
1893
0
         &work[iwork], &i__2, &ierr);
1894
0
      iwork = ie + *n;
1895
1896
/*                    Perform bidiagonal QR iteration, computing left */
1897
/*                    singular vectors of A in U and computing right */
1898
/*                    singular vectors of A in A */
1899
/*                    (Workspace: need BDSPAC) */
1900
1901
0
      sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
1902
0
        a_offset], lda, &u[u_offset], ldu, dum, &c__1,
1903
0
         &work[iwork], info);
1904
1905
0
        }
1906
1907
0
    } else if (wntvas) {
1908
1909
/*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' */
1910
/*                         or 'A') */
1911
/*                 N left singular vectors to be computed in U and */
1912
/*                 N right singular vectors to be computed in VT */
1913
1914
/* Computing MAX */
1915
0
        i__2 = *n << 2;
1916
0
        if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
1917
1918
/*                    Sufficient workspace for a fast algorithm */
1919
1920
0
      iu = 1;
1921
0
      if (*lwork >= wrkbl + *lda * *n) {
1922
1923
/*                       WORK(IU) is LDA by N */
1924
1925
0
          ldwrku = *lda;
1926
0
      } else {
1927
1928
/*                       WORK(IU) is N by N */
1929
1930
0
          ldwrku = *n;
1931
0
      }
1932
0
      itau = iu + ldwrku * *n;
1933
0
      iwork = itau + *n;
1934
1935
/*                    Compute A=Q*R */
1936
/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1937
1938
0
      i__2 = *lwork - iwork + 1;
1939
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
1940
0
        iwork], &i__2, &ierr);
1941
1942
/*                    Copy R to WORK(IU), zeroing out below it */
1943
1944
0
      slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
1945
0
        ldwrku);
1946
0
      i__2 = *n - 1;
1947
0
      i__3 = *n - 1;
1948
0
      slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
1949
0
        1], &ldwrku);
1950
1951
/*                    Generate Q in A */
1952
/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
1953
1954
0
      i__2 = *lwork - iwork + 1;
1955
0
      sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &
1956
0
        work[iwork], &i__2, &ierr);
1957
0
      ie = itau;
1958
0
      itauq = ie + *n;
1959
0
      itaup = itauq + *n;
1960
0
      iwork = itaup + *n;
1961
1962
/*                    Bidiagonalize R in WORK(IU), copying result to VT */
1963
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
1964
1965
0
      i__2 = *lwork - iwork + 1;
1966
0
      sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
1967
0
        work[itauq], &work[itaup], &work[iwork], &
1968
0
        i__2, &ierr);
1969
0
      slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
1970
0
         ldvt);
1971
1972
/*                    Generate left bidiagonalizing vectors in WORK(IU) */
1973
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
1974
1975
0
      i__2 = *lwork - iwork + 1;
1976
0
      sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
1977
0
        , &work[iwork], &i__2, &ierr);
1978
1979
/*                    Generate right bidiagonalizing vectors in VT */
1980
/*                    (Workspace: need N*N+4*N-1, */
1981
/*                                prefer N*N+3*N+(N-1)*NB) */
1982
1983
0
      i__2 = *lwork - iwork + 1;
1984
0
      sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
1985
0
        itaup], &work[iwork], &i__2, &ierr)
1986
0
        ;
1987
0
      iwork = ie + *n;
1988
1989
/*                    Perform bidiagonal QR iteration, computing left */
1990
/*                    singular vectors of R in WORK(IU) and computing */
1991
/*                    right singular vectors of R in VT */
1992
/*                    (Workspace: need N*N+BDSPAC) */
1993
1994
0
      sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
1995
0
        vt_offset], ldvt, &work[iu], &ldwrku, dum, &
1996
0
        c__1, &work[iwork], info);
1997
1998
/*                    Multiply Q in A by left singular vectors of R in */
1999
/*                    WORK(IU), storing result in U */
2000
/*                    (Workspace: need N*N) */
2001
2002
0
      sgemm_("N", "N", m, n, n, &c_b79, &a[a_offset], lda, &
2003
0
        work[iu], &ldwrku, &c_b57, &u[u_offset], ldu);
2004
2005
0
        } else {
2006
2007
/*                    Insufficient workspace for a fast algorithm */
2008
2009
0
      itau = 1;
2010
0
      iwork = itau + *n;
2011
2012
/*                    Compute A=Q*R, copying result to U */
2013
/*                    (Workspace: need 2*N, prefer N+N*NB) */
2014
2015
0
      i__2 = *lwork - iwork + 1;
2016
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2017
0
        iwork], &i__2, &ierr);
2018
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2019
0
        ldu);
2020
2021
/*                    Generate Q in U */
2022
/*                    (Workspace: need 2*N, prefer N+N*NB) */
2023
2024
0
      i__2 = *lwork - iwork + 1;
2025
0
      sorgqr_(m, n, n, &u[u_offset], ldu, &work[itau], &
2026
0
        work[iwork], &i__2, &ierr);
2027
2028
/*                    Copy R to VT, zeroing out below it */
2029
2030
0
      slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
2031
0
        ldvt);
2032
0
      if (*n > 1) {
2033
0
          i__2 = *n - 1;
2034
0
          i__3 = *n - 1;
2035
0
          slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
2036
0
            vt_dim1 + 2], ldvt);
2037
0
      }
2038
0
      ie = itau;
2039
0
      itauq = ie + *n;
2040
0
      itaup = itauq + *n;
2041
0
      iwork = itaup + *n;
2042
2043
/*                    Bidiagonalize R in VT */
2044
/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2045
2046
0
      i__2 = *lwork - iwork + 1;
2047
0
      sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
2048
0
        &work[itauq], &work[itaup], &work[iwork], &
2049
0
        i__2, &ierr);
2050
2051
/*                    Multiply Q in U by left bidiagonalizing vectors */
2052
/*                    in VT */
2053
/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2054
2055
0
      i__2 = *lwork - iwork + 1;
2056
0
      sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
2057
0
        &work[itauq], &u[u_offset], ldu, &work[iwork],
2058
0
         &i__2, &ierr);
2059
2060
/*                    Generate right bidiagonalizing vectors in VT */
2061
/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2062
2063
0
      i__2 = *lwork - iwork + 1;
2064
0
      sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2065
0
        itaup], &work[iwork], &i__2, &ierr)
2066
0
        ;
2067
0
      iwork = ie + *n;
2068
2069
/*                    Perform bidiagonal QR iteration, computing left */
2070
/*                    singular vectors of A in U and computing right */
2071
/*                    singular vectors of A in VT */
2072
/*                    (Workspace: need BDSPAC) */
2073
2074
0
      sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
2075
0
        vt_offset], ldvt, &u[u_offset], ldu, dum, &
2076
0
        c__1, &work[iwork], info);
2077
2078
0
        }
2079
2080
0
    }
2081
2082
0
      } else if (wntua) {
2083
2084
0
    if (wntvn) {
2085
2086
/*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N') */
2087
/*                 M left singular vectors to be computed in U and */
2088
/*                 no right singular vectors to be computed */
2089
2090
/* Computing MAX */
2091
0
        i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
2092
0
        if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
2093
2094
/*                    Sufficient workspace for a fast algorithm */
2095
2096
0
      ir = 1;
2097
0
      if (*lwork >= wrkbl + *lda * *n) {
2098
2099
/*                       WORK(IR) is LDA by N */
2100
2101
0
          ldwrkr = *lda;
2102
0
      } else {
2103
2104
/*                       WORK(IR) is N by N */
2105
2106
0
          ldwrkr = *n;
2107
0
      }
2108
0
      itau = ir + ldwrkr * *n;
2109
0
      iwork = itau + *n;
2110
2111
/*                    Compute A=Q*R, copying result to U */
2112
/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
2113
2114
0
      i__2 = *lwork - iwork + 1;
2115
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2116
0
        iwork], &i__2, &ierr);
2117
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2118
0
        ldu);
2119
2120
/*                    Copy R to WORK(IR), zeroing out below it */
2121
2122
0
      slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &
2123
0
        ldwrkr);
2124
0
      i__2 = *n - 1;
2125
0
      i__3 = *n - 1;
2126
0
      slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
2127
0
        1], &ldwrkr);
2128
2129
/*                    Generate Q in U */
2130
/*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
2131
2132
0
      i__2 = *lwork - iwork + 1;
2133
0
      sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2134
0
        work[iwork], &i__2, &ierr);
2135
0
      ie = itau;
2136
0
      itauq = ie + *n;
2137
0
      itaup = itauq + *n;
2138
0
      iwork = itaup + *n;
2139
2140
/*                    Bidiagonalize R in WORK(IR) */
2141
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
2142
2143
0
      i__2 = *lwork - iwork + 1;
2144
0
      sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &
2145
0
        work[itauq], &work[itaup], &work[iwork], &
2146
0
        i__2, &ierr);
2147
2148
/*                    Generate left bidiagonalizing vectors in WORK(IR) */
2149
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
2150
2151
0
      i__2 = *lwork - iwork + 1;
2152
0
      sorgbr_("Q", n, n, n, &work[ir], &ldwrkr, &work[itauq]
2153
0
        , &work[iwork], &i__2, &ierr);
2154
0
      iwork = ie + *n;
2155
2156
/*                    Perform bidiagonal QR iteration, computing left */
2157
/*                    singular vectors of R in WORK(IR) */
2158
/*                    (Workspace: need N*N+BDSPAC) */
2159
2160
0
      sbdsqr_("U", n, &c__0, n, &c__0, &s[1], &work[ie], 
2161
0
        dum, &c__1, &work[ir], &ldwrkr, dum, &c__1, &
2162
0
        work[iwork], info);
2163
2164
/*                    Multiply Q in U by left singular vectors of R in */
2165
/*                    WORK(IR), storing result in A */
2166
/*                    (Workspace: need N*N) */
2167
2168
0
      sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
2169
0
        work[ir], &ldwrkr, &c_b57, &a[a_offset], lda);
2170
2171
/*                    Copy left singular vectors of A from A to U */
2172
2173
0
      slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
2174
0
        ldu);
2175
2176
0
        } else {
2177
2178
/*                    Insufficient workspace for a fast algorithm */
2179
2180
0
      itau = 1;
2181
0
      iwork = itau + *n;
2182
2183
/*                    Compute A=Q*R, copying result to U */
2184
/*                    (Workspace: need 2*N, prefer N+N*NB) */
2185
2186
0
      i__2 = *lwork - iwork + 1;
2187
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2188
0
        iwork], &i__2, &ierr);
2189
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2190
0
        ldu);
2191
2192
/*                    Generate Q in U */
2193
/*                    (Workspace: need N+M, prefer N+M*NB) */
2194
2195
0
      i__2 = *lwork - iwork + 1;
2196
0
      sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2197
0
        work[iwork], &i__2, &ierr);
2198
0
      ie = itau;
2199
0
      itauq = ie + *n;
2200
0
      itaup = itauq + *n;
2201
0
      iwork = itaup + *n;
2202
2203
/*                    Zero out below R in A */
2204
2205
0
      if (*n > 1) {
2206
0
          i__2 = *n - 1;
2207
0
          i__3 = *n - 1;
2208
0
          slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
2209
0
            a_dim1 + 2], lda);
2210
0
      }
2211
2212
/*                    Bidiagonalize R in A */
2213
/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2214
2215
0
      i__2 = *lwork - iwork + 1;
2216
0
      sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
2217
0
        work[itauq], &work[itaup], &work[iwork], &
2218
0
        i__2, &ierr);
2219
2220
/*                    Multiply Q in U by left bidiagonalizing vectors */
2221
/*                    in A */
2222
/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2223
2224
0
      i__2 = *lwork - iwork + 1;
2225
0
      sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2226
0
        work[itauq], &u[u_offset], ldu, &work[iwork], 
2227
0
        &i__2, &ierr)
2228
0
        ;
2229
0
      iwork = ie + *n;
2230
2231
/*                    Perform bidiagonal QR iteration, computing left */
2232
/*                    singular vectors of A in U */
2233
/*                    (Workspace: need BDSPAC) */
2234
2235
0
      sbdsqr_("U", n, &c__0, m, &c__0, &s[1], &work[ie], 
2236
0
        dum, &c__1, &u[u_offset], ldu, dum, &c__1, &
2237
0
        work[iwork], info);
2238
2239
0
        }
2240
2241
0
    } else if (wntvo) {
2242
2243
/*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O') */
2244
/*                 M left singular vectors to be computed in U and */
2245
/*                 N right singular vectors to be overwritten on A */
2246
2247
/* Computing MAX */
2248
0
        i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
2249
0
        if (*lwork >= (*n << 1) * *n + f2cmax(i__2,bdspac)) {
2250
2251
/*                    Sufficient workspace for a fast algorithm */
2252
2253
0
      iu = 1;
2254
0
      if (*lwork >= wrkbl + (*lda << 1) * *n) {
2255
2256
/*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N */
2257
2258
0
          ldwrku = *lda;
2259
0
          ir = iu + ldwrku * *n;
2260
0
          ldwrkr = *lda;
2261
0
      } else if (*lwork >= wrkbl + (*lda + *n) * *n) {
2262
2263
/*                       WORK(IU) is LDA by N and WORK(IR) is N by N */
2264
2265
0
          ldwrku = *lda;
2266
0
          ir = iu + ldwrku * *n;
2267
0
          ldwrkr = *n;
2268
0
      } else {
2269
2270
/*                       WORK(IU) is N by N and WORK(IR) is N by N */
2271
2272
0
          ldwrku = *n;
2273
0
          ir = iu + ldwrku * *n;
2274
0
          ldwrkr = *n;
2275
0
      }
2276
0
      itau = ir + ldwrkr * *n;
2277
0
      iwork = itau + *n;
2278
2279
/*                    Compute A=Q*R, copying result to U */
2280
/*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) */
2281
2282
0
      i__2 = *lwork - iwork + 1;
2283
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2284
0
        iwork], &i__2, &ierr);
2285
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2286
0
        ldu);
2287
2288
/*                    Generate Q in U */
2289
/*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) */
2290
2291
0
      i__2 = *lwork - iwork + 1;
2292
0
      sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2293
0
        work[iwork], &i__2, &ierr);
2294
2295
/*                    Copy R to WORK(IU), zeroing out below it */
2296
2297
0
      slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2298
0
        ldwrku);
2299
0
      i__2 = *n - 1;
2300
0
      i__3 = *n - 1;
2301
0
      slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
2302
0
        1], &ldwrku);
2303
0
      ie = itau;
2304
0
      itauq = ie + *n;
2305
0
      itaup = itauq + *n;
2306
0
      iwork = itaup + *n;
2307
2308
/*                    Bidiagonalize R in WORK(IU), copying result to */
2309
/*                    WORK(IR) */
2310
/*                    (Workspace: need 2*N*N+4*N, */
2311
/*                                prefer 2*N*N+3*N+2*N*NB) */
2312
2313
0
      i__2 = *lwork - iwork + 1;
2314
0
      sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
2315
0
        work[itauq], &work[itaup], &work[iwork], &
2316
0
        i__2, &ierr);
2317
0
      slacpy_("U", n, n, &work[iu], &ldwrku, &work[ir], &
2318
0
        ldwrkr);
2319
2320
/*                    Generate left bidiagonalizing vectors in WORK(IU) */
2321
/*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) */
2322
2323
0
      i__2 = *lwork - iwork + 1;
2324
0
      sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2325
0
        , &work[iwork], &i__2, &ierr);
2326
2327
/*                    Generate right bidiagonalizing vectors in WORK(IR) */
2328
/*                    (Workspace: need 2*N*N+4*N-1, */
2329
/*                                prefer 2*N*N+3*N+(N-1)*NB) */
2330
2331
0
      i__2 = *lwork - iwork + 1;
2332
0
      sorgbr_("P", n, n, n, &work[ir], &ldwrkr, &work[itaup]
2333
0
        , &work[iwork], &i__2, &ierr);
2334
0
      iwork = ie + *n;
2335
2336
/*                    Perform bidiagonal QR iteration, computing left */
2337
/*                    singular vectors of R in WORK(IU) and computing */
2338
/*                    right singular vectors of R in WORK(IR) */
2339
/*                    (Workspace: need 2*N*N+BDSPAC) */
2340
2341
0
      sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &work[
2342
0
        ir], &ldwrkr, &work[iu], &ldwrku, dum, &c__1, 
2343
0
        &work[iwork], info);
2344
2345
/*                    Multiply Q in U by left singular vectors of R in */
2346
/*                    WORK(IU), storing result in A */
2347
/*                    (Workspace: need N*N) */
2348
2349
0
      sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
2350
0
        work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
2351
2352
/*                    Copy left singular vectors of A from A to U */
2353
2354
0
      slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
2355
0
        ldu);
2356
2357
/*                    Copy right singular vectors of R from WORK(IR) to A */
2358
2359
0
      slacpy_("F", n, n, &work[ir], &ldwrkr, &a[a_offset], 
2360
0
        lda);
2361
2362
0
        } else {
2363
2364
/*                    Insufficient workspace for a fast algorithm */
2365
2366
0
      itau = 1;
2367
0
      iwork = itau + *n;
2368
2369
/*                    Compute A=Q*R, copying result to U */
2370
/*                    (Workspace: need 2*N, prefer N+N*NB) */
2371
2372
0
      i__2 = *lwork - iwork + 1;
2373
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2374
0
        iwork], &i__2, &ierr);
2375
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2376
0
        ldu);
2377
2378
/*                    Generate Q in U */
2379
/*                    (Workspace: need N+M, prefer N+M*NB) */
2380
2381
0
      i__2 = *lwork - iwork + 1;
2382
0
      sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2383
0
        work[iwork], &i__2, &ierr);
2384
0
      ie = itau;
2385
0
      itauq = ie + *n;
2386
0
      itaup = itauq + *n;
2387
0
      iwork = itaup + *n;
2388
2389
/*                    Zero out below R in A */
2390
2391
0
      if (*n > 1) {
2392
0
          i__2 = *n - 1;
2393
0
          i__3 = *n - 1;
2394
0
          slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &a[
2395
0
            a_dim1 + 2], lda);
2396
0
      }
2397
2398
/*                    Bidiagonalize R in A */
2399
/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2400
2401
0
      i__2 = *lwork - iwork + 1;
2402
0
      sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &
2403
0
        work[itauq], &work[itaup], &work[iwork], &
2404
0
        i__2, &ierr);
2405
2406
/*                    Multiply Q in U by left bidiagonalizing vectors */
2407
/*                    in A */
2408
/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2409
2410
0
      i__2 = *lwork - iwork + 1;
2411
0
      sormbr_("Q", "R", "N", m, n, n, &a[a_offset], lda, &
2412
0
        work[itauq], &u[u_offset], ldu, &work[iwork], 
2413
0
        &i__2, &ierr)
2414
0
        ;
2415
2416
/*                    Generate right bidiagonalizing vectors in A */
2417
/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2418
2419
0
      i__2 = *lwork - iwork + 1;
2420
0
      sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup],
2421
0
         &work[iwork], &i__2, &ierr);
2422
0
      iwork = ie + *n;
2423
2424
/*                    Perform bidiagonal QR iteration, computing left */
2425
/*                    singular vectors of A in U and computing right */
2426
/*                    singular vectors of A in A */
2427
/*                    (Workspace: need BDSPAC) */
2428
2429
0
      sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &a[
2430
0
        a_offset], lda, &u[u_offset], ldu, dum, &c__1,
2431
0
         &work[iwork], info);
2432
2433
0
        }
2434
2435
0
    } else if (wntvas) {
2436
2437
/*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' */
2438
/*                         or 'A') */
2439
/*                 M left singular vectors to be computed in U and */
2440
/*                 N right singular vectors to be computed in VT */
2441
2442
/* Computing MAX */
2443
0
        i__2 = *n + *m, i__3 = *n << 2, i__2 = f2cmax(i__2,i__3);
2444
0
        if (*lwork >= *n * *n + f2cmax(i__2,bdspac)) {
2445
2446
/*                    Sufficient workspace for a fast algorithm */
2447
2448
0
      iu = 1;
2449
0
      if (*lwork >= wrkbl + *lda * *n) {
2450
2451
/*                       WORK(IU) is LDA by N */
2452
2453
0
          ldwrku = *lda;
2454
0
      } else {
2455
2456
/*                       WORK(IU) is N by N */
2457
2458
0
          ldwrku = *n;
2459
0
      }
2460
0
      itau = iu + ldwrku * *n;
2461
0
      iwork = itau + *n;
2462
2463
/*                    Compute A=Q*R, copying result to U */
2464
/*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB) */
2465
2466
0
      i__2 = *lwork - iwork + 1;
2467
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2468
0
        iwork], &i__2, &ierr);
2469
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2470
0
        ldu);
2471
2472
/*                    Generate Q in U */
2473
/*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB) */
2474
2475
0
      i__2 = *lwork - iwork + 1;
2476
0
      sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2477
0
        work[iwork], &i__2, &ierr);
2478
2479
/*                    Copy R to WORK(IU), zeroing out below it */
2480
2481
0
      slacpy_("U", n, n, &a[a_offset], lda, &work[iu], &
2482
0
        ldwrku);
2483
0
      i__2 = *n - 1;
2484
0
      i__3 = *n - 1;
2485
0
      slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
2486
0
        1], &ldwrku);
2487
0
      ie = itau;
2488
0
      itauq = ie + *n;
2489
0
      itaup = itauq + *n;
2490
0
      iwork = itaup + *n;
2491
2492
/*                    Bidiagonalize R in WORK(IU), copying result to VT */
2493
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) */
2494
2495
0
      i__2 = *lwork - iwork + 1;
2496
0
      sgebrd_(n, n, &work[iu], &ldwrku, &s[1], &work[ie], &
2497
0
        work[itauq], &work[itaup], &work[iwork], &
2498
0
        i__2, &ierr);
2499
0
      slacpy_("U", n, n, &work[iu], &ldwrku, &vt[vt_offset],
2500
0
         ldvt);
2501
2502
/*                    Generate left bidiagonalizing vectors in WORK(IU) */
2503
/*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) */
2504
2505
0
      i__2 = *lwork - iwork + 1;
2506
0
      sorgbr_("Q", n, n, n, &work[iu], &ldwrku, &work[itauq]
2507
0
        , &work[iwork], &i__2, &ierr);
2508
2509
/*                    Generate right bidiagonalizing vectors in VT */
2510
/*                    (Workspace: need N*N+4*N-1, */
2511
/*                                prefer N*N+3*N+(N-1)*NB) */
2512
2513
0
      i__2 = *lwork - iwork + 1;
2514
0
      sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2515
0
        itaup], &work[iwork], &i__2, &ierr)
2516
0
        ;
2517
0
      iwork = ie + *n;
2518
2519
/*                    Perform bidiagonal QR iteration, computing left */
2520
/*                    singular vectors of R in WORK(IU) and computing */
2521
/*                    right singular vectors of R in VT */
2522
/*                    (Workspace: need N*N+BDSPAC) */
2523
2524
0
      sbdsqr_("U", n, n, n, &c__0, &s[1], &work[ie], &vt[
2525
0
        vt_offset], ldvt, &work[iu], &ldwrku, dum, &
2526
0
        c__1, &work[iwork], info);
2527
2528
/*                    Multiply Q in U by left singular vectors of R in */
2529
/*                    WORK(IU), storing result in A */
2530
/*                    (Workspace: need N*N) */
2531
2532
0
      sgemm_("N", "N", m, n, n, &c_b79, &u[u_offset], ldu, &
2533
0
        work[iu], &ldwrku, &c_b57, &a[a_offset], lda);
2534
2535
/*                    Copy left singular vectors of A from A to U */
2536
2537
0
      slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], 
2538
0
        ldu);
2539
2540
0
        } else {
2541
2542
/*                    Insufficient workspace for a fast algorithm */
2543
2544
0
      itau = 1;
2545
0
      iwork = itau + *n;
2546
2547
/*                    Compute A=Q*R, copying result to U */
2548
/*                    (Workspace: need 2*N, prefer N+N*NB) */
2549
2550
0
      i__2 = *lwork - iwork + 1;
2551
0
      sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[
2552
0
        iwork], &i__2, &ierr);
2553
0
      slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], 
2554
0
        ldu);
2555
2556
/*                    Generate Q in U */
2557
/*                    (Workspace: need N+M, prefer N+M*NB) */
2558
2559
0
      i__2 = *lwork - iwork + 1;
2560
0
      sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &
2561
0
        work[iwork], &i__2, &ierr);
2562
2563
/*                    Copy R from A to VT, zeroing out below it */
2564
2565
0
      slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], 
2566
0
        ldvt);
2567
0
      if (*n > 1) {
2568
0
          i__2 = *n - 1;
2569
0
          i__3 = *n - 1;
2570
0
          slaset_("L", &i__2, &i__3, &c_b57, &c_b57, &vt[
2571
0
            vt_dim1 + 2], ldvt);
2572
0
      }
2573
0
      ie = itau;
2574
0
      itauq = ie + *n;
2575
0
      itaup = itauq + *n;
2576
0
      iwork = itaup + *n;
2577
2578
/*                    Bidiagonalize R in VT */
2579
/*                    (Workspace: need 4*N, prefer 3*N+2*N*NB) */
2580
2581
0
      i__2 = *lwork - iwork + 1;
2582
0
      sgebrd_(n, n, &vt[vt_offset], ldvt, &s[1], &work[ie], 
2583
0
        &work[itauq], &work[itaup], &work[iwork], &
2584
0
        i__2, &ierr);
2585
2586
/*                    Multiply Q in U by left bidiagonalizing vectors */
2587
/*                    in VT */
2588
/*                    (Workspace: need 3*N+M, prefer 3*N+M*NB) */
2589
2590
0
      i__2 = *lwork - iwork + 1;
2591
0
      sormbr_("Q", "R", "N", m, n, n, &vt[vt_offset], ldvt, 
2592
0
        &work[itauq], &u[u_offset], ldu, &work[iwork],
2593
0
         &i__2, &ierr);
2594
2595
/*                    Generate right bidiagonalizing vectors in VT */
2596
/*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2597
2598
0
      i__2 = *lwork - iwork + 1;
2599
0
      sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[
2600
0
        itaup], &work[iwork], &i__2, &ierr)
2601
0
        ;
2602
0
      iwork = ie + *n;
2603
2604
/*                    Perform bidiagonal QR iteration, computing left */
2605
/*                    singular vectors of A in U and computing right */
2606
/*                    singular vectors of A in VT */
2607
/*                    (Workspace: need BDSPAC) */
2608
2609
0
      sbdsqr_("U", n, n, m, &c__0, &s[1], &work[ie], &vt[
2610
0
        vt_offset], ldvt, &u[u_offset], ldu, dum, &
2611
0
        c__1, &work[iwork], info);
2612
2613
0
        }
2614
2615
0
    }
2616
2617
0
      }
2618
2619
0
  } else {
2620
2621
/*           M .LT. MNTHR */
2622
2623
/*           Path 10 (M at least N, but not much larger) */
2624
/*           Reduce to bidiagonal form without QR decomposition */
2625
2626
0
      ie = 1;
2627
0
      itauq = ie + *n;
2628
0
      itaup = itauq + *n;
2629
0
      iwork = itaup + *n;
2630
2631
/*           Bidiagonalize A */
2632
/*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) */
2633
2634
0
      i__2 = *lwork - iwork + 1;
2635
0
      sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
2636
0
        work[itaup], &work[iwork], &i__2, &ierr);
2637
0
      if (wntuas) {
2638
2639
/*              If left singular vectors desired in U, copy result to U */
2640
/*              and generate left bidiagonalizing vectors in U */
2641
/*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) */
2642
2643
0
    slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
2644
0
    if (wntus) {
2645
0
        ncu = *n;
2646
0
    }
2647
0
    if (wntua) {
2648
0
        ncu = *m;
2649
0
    }
2650
0
    i__2 = *lwork - iwork + 1;
2651
0
    sorgbr_("Q", m, &ncu, n, &u[u_offset], ldu, &work[itauq], &
2652
0
      work[iwork], &i__2, &ierr);
2653
0
      }
2654
0
      if (wntvas) {
2655
2656
/*              If right singular vectors desired in VT, copy result to */
2657
/*              VT and generate right bidiagonalizing vectors in VT */
2658
/*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2659
2660
0
    slacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
2661
0
    i__2 = *lwork - iwork + 1;
2662
0
    sorgbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
2663
0
      work[iwork], &i__2, &ierr);
2664
0
      }
2665
0
      if (wntuo) {
2666
2667
/*              If left singular vectors desired in A, generate left */
2668
/*              bidiagonalizing vectors in A */
2669
/*              (Workspace: need 4*N, prefer 3*N+N*NB) */
2670
2671
0
    i__2 = *lwork - iwork + 1;
2672
0
    sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
2673
0
      iwork], &i__2, &ierr);
2674
0
      }
2675
0
      if (wntvo) {
2676
2677
/*              If right singular vectors desired in A, generate right */
2678
/*              bidiagonalizing vectors in A */
2679
/*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) */
2680
2681
0
    i__2 = *lwork - iwork + 1;
2682
0
    sorgbr_("P", n, n, n, &a[a_offset], lda, &work[itaup], &work[
2683
0
      iwork], &i__2, &ierr);
2684
0
      }
2685
0
      iwork = ie + *n;
2686
0
      if (wntuas || wntuo) {
2687
0
    nru = *m;
2688
0
      }
2689
0
      if (wntun) {
2690
0
    nru = 0;
2691
0
      }
2692
0
      if (wntvas || wntvo) {
2693
0
    ncvt = *n;
2694
0
      }
2695
0
      if (wntvn) {
2696
0
    ncvt = 0;
2697
0
      }
2698
0
      if (! wntuo && ! wntvo) {
2699
2700
/*              Perform bidiagonal QR iteration, if desired, computing */
2701
/*              left singular vectors in U and computing right singular */
2702
/*              vectors in VT */
2703
/*              (Workspace: need BDSPAC) */
2704
2705
0
    sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
2706
0
      vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
2707
0
      work[iwork], info);
2708
0
      } else if (! wntuo && wntvo) {
2709
2710
/*              Perform bidiagonal QR iteration, if desired, computing */
2711
/*              left singular vectors in U and computing right singular */
2712
/*              vectors in A */
2713
/*              (Workspace: need BDSPAC) */
2714
2715
0
    sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
2716
0
      a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
2717
0
      iwork], info);
2718
0
      } else {
2719
2720
/*              Perform bidiagonal QR iteration, if desired, computing */
2721
/*              left singular vectors in A and computing right singular */
2722
/*              vectors in VT */
2723
/*              (Workspace: need BDSPAC) */
2724
2725
0
    sbdsqr_("U", n, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
2726
0
      vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
2727
0
      work[iwork], info);
2728
0
      }
2729
2730
0
  }
2731
2732
0
    } else {
2733
2734
/*        A has more columns than rows. If A has sufficiently more */
2735
/*        columns than rows, first reduce using the LQ decomposition (if */
2736
/*        sufficient workspace available) */
2737
2738
0
  if (*n >= mnthr) {
2739
2740
0
      if (wntvn) {
2741
2742
/*              Path 1t(N much larger than M, JOBVT='N') */
2743
/*              No right singular vectors to be computed */
2744
2745
0
    itau = 1;
2746
0
    iwork = itau + *m;
2747
2748
/*              Compute A=L*Q */
2749
/*              (Workspace: need 2*M, prefer M+M*NB) */
2750
2751
0
    i__2 = *lwork - iwork + 1;
2752
0
    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork], &
2753
0
      i__2, &ierr);
2754
2755
/*              Zero out above L */
2756
2757
0
    i__2 = *m - 1;
2758
0
    i__3 = *m - 1;
2759
0
    slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 << 1) + 
2760
0
      1], lda);
2761
0
    ie = 1;
2762
0
    itauq = ie + *m;
2763
0
    itaup = itauq + *m;
2764
0
    iwork = itaup + *m;
2765
2766
/*              Bidiagonalize L in A */
2767
/*              (Workspace: need 4*M, prefer 3*M+2*M*NB) */
2768
2769
0
    i__2 = *lwork - iwork + 1;
2770
0
    sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
2771
0
      itauq], &work[itaup], &work[iwork], &i__2, &ierr);
2772
0
    if (wntuo || wntuas) {
2773
2774
/*                 If left singular vectors desired, generate Q */
2775
/*                 (Workspace: need 4*M, prefer 3*M+M*NB) */
2776
2777
0
        i__2 = *lwork - iwork + 1;
2778
0
        sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq], &
2779
0
          work[iwork], &i__2, &ierr);
2780
0
    }
2781
0
    iwork = ie + *m;
2782
0
    nru = 0;
2783
0
    if (wntuo || wntuas) {
2784
0
        nru = *m;
2785
0
    }
2786
2787
/*              Perform bidiagonal QR iteration, computing left singular */
2788
/*              vectors of A in A if desired */
2789
/*              (Workspace: need BDSPAC) */
2790
2791
0
    sbdsqr_("U", m, &c__0, &nru, &c__0, &s[1], &work[ie], dum, &
2792
0
      c__1, &a[a_offset], lda, dum, &c__1, &work[iwork], 
2793
0
      info);
2794
2795
/*              If left singular vectors desired in U, copy them there */
2796
2797
0
    if (wntuas) {
2798
0
        slacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
2799
0
    }
2800
2801
0
      } else if (wntvo && wntun) {
2802
2803
/*              Path 2t(N much larger than M, JOBU='N', JOBVT='O') */
2804
/*              M right singular vectors to be overwritten on A and */
2805
/*              no left singular vectors to be computed */
2806
2807
/* Computing MAX */
2808
0
    i__2 = *m << 2;
2809
0
    if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
2810
2811
/*                 Sufficient workspace for a fast algorithm */
2812
2813
0
        ir = 1;
2814
/* Computing MAX */
2815
0
        i__2 = wrkbl, i__3 = *lda * *n + *m;
2816
0
        if (*lwork >= f2cmax(i__2,i__3) + *lda * *m) {
2817
2818
/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
2819
2820
0
      ldwrku = *lda;
2821
0
      chunk = *n;
2822
0
      ldwrkr = *lda;
2823
0
        } else /* if(complicated condition) */ {
2824
/* Computing MAX */
2825
0
      i__2 = wrkbl, i__3 = *lda * *n + *m;
2826
0
      if (*lwork >= f2cmax(i__2,i__3) + *m * *m) {
2827
2828
/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
2829
2830
0
          ldwrku = *lda;
2831
0
          chunk = *n;
2832
0
          ldwrkr = *m;
2833
0
      } else {
2834
2835
/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
2836
2837
0
          ldwrku = *m;
2838
0
          chunk = (*lwork - *m * *m - *m) / *m;
2839
0
          ldwrkr = *m;
2840
0
      }
2841
0
        }
2842
0
        itau = ir + ldwrkr * *m;
2843
0
        iwork = itau + *m;
2844
2845
/*                 Compute A=L*Q */
2846
/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
2847
2848
0
        i__2 = *lwork - iwork + 1;
2849
0
        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
2850
0
          , &i__2, &ierr);
2851
2852
/*                 Copy L to WORK(IR) and zero out above it */
2853
2854
0
        slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &ldwrkr);
2855
0
        i__2 = *m - 1;
2856
0
        i__3 = *m - 1;
2857
0
        slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
2858
0
          ldwrkr], &ldwrkr);
2859
2860
/*                 Generate Q in A */
2861
/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
2862
2863
0
        i__2 = *lwork - iwork + 1;
2864
0
        sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
2865
0
          iwork], &i__2, &ierr);
2866
0
        ie = itau;
2867
0
        itauq = ie + *m;
2868
0
        itaup = itauq + *m;
2869
0
        iwork = itaup + *m;
2870
2871
/*                 Bidiagonalize L in WORK(IR) */
2872
/*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
2873
2874
0
        i__2 = *lwork - iwork + 1;
2875
0
        sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
2876
0
          itauq], &work[itaup], &work[iwork], &i__2, &ierr);
2877
2878
/*                 Generate right vectors bidiagonalizing L */
2879
/*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
2880
2881
0
        i__2 = *lwork - iwork + 1;
2882
0
        sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
2883
0
          work[iwork], &i__2, &ierr);
2884
0
        iwork = ie + *m;
2885
2886
/*                 Perform bidiagonal QR iteration, computing right */
2887
/*                 singular vectors of L in WORK(IR) */
2888
/*                 (Workspace: need M*M+BDSPAC) */
2889
2890
0
        sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &work[
2891
0
          ir], &ldwrkr, dum, &c__1, dum, &c__1, &work[iwork]
2892
0
          , info);
2893
0
        iu = ie + *m;
2894
2895
/*                 Multiply right singular vectors of L in WORK(IR) by Q */
2896
/*                 in A, storing result in WORK(IU) and copying to A */
2897
/*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M) */
2898
2899
0
        i__2 = *n;
2900
0
        i__3 = chunk;
2901
0
        for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
2902
0
           i__3) {
2903
/* Computing MIN */
2904
0
      i__4 = *n - i__ + 1;
2905
0
      blk = f2cmin(i__4,chunk);
2906
0
      sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
2907
0
        ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
2908
0
        work[iu], &ldwrku);
2909
0
      slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
2910
0
        a_dim1 + 1], lda);
2911
/* L30: */
2912
0
        }
2913
2914
0
    } else {
2915
2916
/*                 Insufficient workspace for a fast algorithm */
2917
2918
0
        ie = 1;
2919
0
        itauq = ie + *m;
2920
0
        itaup = itauq + *m;
2921
0
        iwork = itaup + *m;
2922
2923
/*                 Bidiagonalize A */
2924
/*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
2925
2926
0
        i__3 = *lwork - iwork + 1;
2927
0
        sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[
2928
0
          itauq], &work[itaup], &work[iwork], &i__3, &ierr);
2929
2930
/*                 Generate right vectors bidiagonalizing A */
2931
/*                 (Workspace: need 4*M, prefer 3*M+M*NB) */
2932
2933
0
        i__3 = *lwork - iwork + 1;
2934
0
        sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
2935
0
          work[iwork], &i__3, &ierr);
2936
0
        iwork = ie + *m;
2937
2938
/*                 Perform bidiagonal QR iteration, computing right */
2939
/*                 singular vectors of A in A */
2940
/*                 (Workspace: need BDSPAC) */
2941
2942
0
        sbdsqr_("L", m, n, &c__0, &c__0, &s[1], &work[ie], &a[
2943
0
          a_offset], lda, dum, &c__1, dum, &c__1, &work[
2944
0
          iwork], info);
2945
2946
0
    }
2947
2948
0
      } else if (wntvo && wntuas) {
2949
2950
/*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') */
2951
/*              M right singular vectors to be overwritten on A and */
2952
/*              M left singular vectors to be computed in U */
2953
2954
/* Computing MAX */
2955
0
    i__3 = *m << 2;
2956
0
    if (*lwork >= *m * *m + f2cmax(i__3,bdspac)) {
2957
2958
/*                 Sufficient workspace for a fast algorithm */
2959
2960
0
        ir = 1;
2961
/* Computing MAX */
2962
0
        i__3 = wrkbl, i__2 = *lda * *n + *m;
2963
0
        if (*lwork >= f2cmax(i__3,i__2) + *lda * *m) {
2964
2965
/*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M */
2966
2967
0
      ldwrku = *lda;
2968
0
      chunk = *n;
2969
0
      ldwrkr = *lda;
2970
0
        } else /* if(complicated condition) */ {
2971
/* Computing MAX */
2972
0
      i__3 = wrkbl, i__2 = *lda * *n + *m;
2973
0
      if (*lwork >= f2cmax(i__3,i__2) + *m * *m) {
2974
2975
/*                    WORK(IU) is LDA by N and WORK(IR) is M by M */
2976
2977
0
          ldwrku = *lda;
2978
0
          chunk = *n;
2979
0
          ldwrkr = *m;
2980
0
      } else {
2981
2982
/*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M */
2983
2984
0
          ldwrku = *m;
2985
0
          chunk = (*lwork - *m * *m - *m) / *m;
2986
0
          ldwrkr = *m;
2987
0
      }
2988
0
        }
2989
0
        itau = ir + ldwrkr * *m;
2990
0
        iwork = itau + *m;
2991
2992
/*                 Compute A=L*Q */
2993
/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
2994
2995
0
        i__3 = *lwork - iwork + 1;
2996
0
        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
2997
0
          , &i__3, &ierr);
2998
2999
/*                 Copy L to U, zeroing about above it */
3000
3001
0
        slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3002
0
        i__3 = *m - 1;
3003
0
        i__2 = *m - 1;
3004
0
        slaset_("U", &i__3, &i__2, &c_b57, &c_b57, &u[(u_dim1 << 
3005
0
          1) + 1], ldu);
3006
3007
/*                 Generate Q in A */
3008
/*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3009
3010
0
        i__3 = *lwork - iwork + 1;
3011
0
        sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3012
0
          iwork], &i__3, &ierr);
3013
0
        ie = itau;
3014
0
        itauq = ie + *m;
3015
0
        itaup = itauq + *m;
3016
0
        iwork = itaup + *m;
3017
3018
/*                 Bidiagonalize L in U, copying result to WORK(IR) */
3019
/*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3020
3021
0
        i__3 = *lwork - iwork + 1;
3022
0
        sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
3023
0
          itauq], &work[itaup], &work[iwork], &i__3, &ierr);
3024
0
        slacpy_("U", m, m, &u[u_offset], ldu, &work[ir], &ldwrkr);
3025
3026
/*                 Generate right vectors bidiagonalizing L in WORK(IR) */
3027
/*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) */
3028
3029
0
        i__3 = *lwork - iwork + 1;
3030
0
        sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup], &
3031
0
          work[iwork], &i__3, &ierr);
3032
3033
/*                 Generate left vectors bidiagonalizing L in U */
3034
/*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
3035
3036
0
        i__3 = *lwork - iwork + 1;
3037
0
        sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
3038
0
          work[iwork], &i__3, &ierr);
3039
0
        iwork = ie + *m;
3040
3041
/*                 Perform bidiagonal QR iteration, computing left */
3042
/*                 singular vectors of L in U, and computing right */
3043
/*                 singular vectors of L in WORK(IR) */
3044
/*                 (Workspace: need M*M+BDSPAC) */
3045
3046
0
        sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[ir], 
3047
0
          &ldwrkr, &u[u_offset], ldu, dum, &c__1, &work[
3048
0
          iwork], info);
3049
0
        iu = ie + *m;
3050
3051
/*                 Multiply right singular vectors of L in WORK(IR) by Q */
3052
/*                 in A, storing result in WORK(IU) and copying to A */
3053
/*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)) */
3054
3055
0
        i__3 = *n;
3056
0
        i__2 = chunk;
3057
0
        for (i__ = 1; i__2 < 0 ? i__ >= i__3 : i__ <= i__3; i__ +=
3058
0
           i__2) {
3059
/* Computing MIN */
3060
0
      i__4 = *n - i__ + 1;
3061
0
      blk = f2cmin(i__4,chunk);
3062
0
      sgemm_("N", "N", m, &blk, m, &c_b79, &work[ir], &
3063
0
        ldwrkr, &a[i__ * a_dim1 + 1], lda, &c_b57, &
3064
0
        work[iu], &ldwrku);
3065
0
      slacpy_("F", m, &blk, &work[iu], &ldwrku, &a[i__ * 
3066
0
        a_dim1 + 1], lda);
3067
/* L40: */
3068
0
        }
3069
3070
0
    } else {
3071
3072
/*                 Insufficient workspace for a fast algorithm */
3073
3074
0
        itau = 1;
3075
0
        iwork = itau + *m;
3076
3077
/*                 Compute A=L*Q */
3078
/*                 (Workspace: need 2*M, prefer M+M*NB) */
3079
3080
0
        i__2 = *lwork - iwork + 1;
3081
0
        sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[iwork]
3082
0
          , &i__2, &ierr);
3083
3084
/*                 Copy L to U, zeroing out above it */
3085
3086
0
        slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
3087
0
        i__2 = *m - 1;
3088
0
        i__3 = *m - 1;
3089
0
        slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 << 
3090
0
          1) + 1], ldu);
3091
3092
/*                 Generate Q in A */
3093
/*                 (Workspace: need 2*M, prefer M+M*NB) */
3094
3095
0
        i__2 = *lwork - iwork + 1;
3096
0
        sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[
3097
0
          iwork], &i__2, &ierr);
3098
0
        ie = itau;
3099
0
        itauq = ie + *m;
3100
0
        itaup = itauq + *m;
3101
0
        iwork = itaup + *m;
3102
3103
/*                 Bidiagonalize L in U */
3104
/*                 (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3105
3106
0
        i__2 = *lwork - iwork + 1;
3107
0
        sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &work[
3108
0
          itauq], &work[itaup], &work[iwork], &i__2, &ierr);
3109
3110
/*                 Multiply right vectors bidiagonalizing L by Q in A */
3111
/*                 (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3112
3113
0
        i__2 = *lwork - iwork + 1;
3114
0
        sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &work[
3115
0
          itaup], &a[a_offset], lda, &work[iwork], &i__2, &
3116
0
          ierr);
3117
3118
/*                 Generate left vectors bidiagonalizing L in U */
3119
/*                 (Workspace: need 4*M, prefer 3*M+M*NB) */
3120
3121
0
        i__2 = *lwork - iwork + 1;
3122
0
        sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq], &
3123
0
          work[iwork], &i__2, &ierr);
3124
0
        iwork = ie + *m;
3125
3126
/*                 Perform bidiagonal QR iteration, computing left */
3127
/*                 singular vectors of A in U and computing right */
3128
/*                 singular vectors of A in A */
3129
/*                 (Workspace: need BDSPAC) */
3130
3131
0
        sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &a[
3132
0
          a_offset], lda, &u[u_offset], ldu, dum, &c__1, &
3133
0
          work[iwork], info);
3134
3135
0
    }
3136
3137
0
      } else if (wntvs) {
3138
3139
0
    if (wntun) {
3140
3141
/*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S') */
3142
/*                 M right singular vectors to be computed in VT and */
3143
/*                 no left singular vectors to be computed */
3144
3145
/* Computing MAX */
3146
0
        i__2 = *m << 2;
3147
0
        if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
3148
3149
/*                    Sufficient workspace for a fast algorithm */
3150
3151
0
      ir = 1;
3152
0
      if (*lwork >= wrkbl + *lda * *m) {
3153
3154
/*                       WORK(IR) is LDA by M */
3155
3156
0
          ldwrkr = *lda;
3157
0
      } else {
3158
3159
/*                       WORK(IR) is M by M */
3160
3161
0
          ldwrkr = *m;
3162
0
      }
3163
0
      itau = ir + ldwrkr * *m;
3164
0
      iwork = itau + *m;
3165
3166
/*                    Compute A=L*Q */
3167
/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3168
3169
0
      i__2 = *lwork - iwork + 1;
3170
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3171
0
        iwork], &i__2, &ierr);
3172
3173
/*                    Copy L to WORK(IR), zeroing out above it */
3174
3175
0
      slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
3176
0
        ldwrkr);
3177
0
      i__2 = *m - 1;
3178
0
      i__3 = *m - 1;
3179
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
3180
0
        ldwrkr], &ldwrkr);
3181
3182
/*                    Generate Q in A */
3183
/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3184
3185
0
      i__2 = *lwork - iwork + 1;
3186
0
      sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3187
0
        work[iwork], &i__2, &ierr);
3188
0
      ie = itau;
3189
0
      itauq = ie + *m;
3190
0
      itaup = itauq + *m;
3191
0
      iwork = itaup + *m;
3192
3193
/*                    Bidiagonalize L in WORK(IR) */
3194
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3195
3196
0
      i__2 = *lwork - iwork + 1;
3197
0
      sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
3198
0
        work[itauq], &work[itaup], &work[iwork], &
3199
0
        i__2, &ierr);
3200
3201
/*                    Generate right vectors bidiagonalizing L in */
3202
/*                    WORK(IR) */
3203
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
3204
3205
0
      i__2 = *lwork - iwork + 1;
3206
0
      sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
3207
0
        , &work[iwork], &i__2, &ierr);
3208
0
      iwork = ie + *m;
3209
3210
/*                    Perform bidiagonal QR iteration, computing right */
3211
/*                    singular vectors of L in WORK(IR) */
3212
/*                    (Workspace: need M*M+BDSPAC) */
3213
3214
0
      sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
3215
0
        work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
3216
0
        work[iwork], info);
3217
3218
/*                    Multiply right singular vectors of L in WORK(IR) by */
3219
/*                    Q in A, storing result in VT */
3220
/*                    (Workspace: need M*M) */
3221
3222
0
      sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, 
3223
0
        &a[a_offset], lda, &c_b57, &vt[vt_offset], 
3224
0
        ldvt);
3225
3226
0
        } else {
3227
3228
/*                    Insufficient workspace for a fast algorithm */
3229
3230
0
      itau = 1;
3231
0
      iwork = itau + *m;
3232
3233
/*                    Compute A=L*Q */
3234
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3235
3236
0
      i__2 = *lwork - iwork + 1;
3237
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3238
0
        iwork], &i__2, &ierr);
3239
3240
/*                    Copy result to VT */
3241
3242
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3243
0
        ldvt);
3244
3245
/*                    Generate Q in VT */
3246
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3247
3248
0
      i__2 = *lwork - iwork + 1;
3249
0
      sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3250
0
        work[iwork], &i__2, &ierr);
3251
0
      ie = itau;
3252
0
      itauq = ie + *m;
3253
0
      itaup = itauq + *m;
3254
0
      iwork = itaup + *m;
3255
3256
/*                    Zero out above L in A */
3257
3258
0
      i__2 = *m - 1;
3259
0
      i__3 = *m - 1;
3260
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
3261
0
        << 1) + 1], lda);
3262
3263
/*                    Bidiagonalize L in A */
3264
/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3265
3266
0
      i__2 = *lwork - iwork + 1;
3267
0
      sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
3268
0
        work[itauq], &work[itaup], &work[iwork], &
3269
0
        i__2, &ierr);
3270
3271
/*                    Multiply right vectors bidiagonalizing L by Q in VT */
3272
/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3273
3274
0
      i__2 = *lwork - iwork + 1;
3275
0
      sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
3276
0
        work[itaup], &vt[vt_offset], ldvt, &work[
3277
0
        iwork], &i__2, &ierr);
3278
0
      iwork = ie + *m;
3279
3280
/*                    Perform bidiagonal QR iteration, computing right */
3281
/*                    singular vectors of A in VT */
3282
/*                    (Workspace: need BDSPAC) */
3283
3284
0
      sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
3285
0
        vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
3286
0
        work[iwork], info);
3287
3288
0
        }
3289
3290
0
    } else if (wntuo) {
3291
3292
/*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S') */
3293
/*                 M right singular vectors to be computed in VT and */
3294
/*                 M left singular vectors to be overwritten on A */
3295
3296
/* Computing MAX */
3297
0
        i__2 = *m << 2;
3298
0
        if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
3299
3300
/*                    Sufficient workspace for a fast algorithm */
3301
3302
0
      iu = 1;
3303
0
      if (*lwork >= wrkbl + (*lda << 1) * *m) {
3304
3305
/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
3306
3307
0
          ldwrku = *lda;
3308
0
          ir = iu + ldwrku * *m;
3309
0
          ldwrkr = *lda;
3310
0
      } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
3311
3312
/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
3313
3314
0
          ldwrku = *lda;
3315
0
          ir = iu + ldwrku * *m;
3316
0
          ldwrkr = *m;
3317
0
      } else {
3318
3319
/*                       WORK(IU) is M by M and WORK(IR) is M by M */
3320
3321
0
          ldwrku = *m;
3322
0
          ir = iu + ldwrku * *m;
3323
0
          ldwrkr = *m;
3324
0
      }
3325
0
      itau = ir + ldwrkr * *m;
3326
0
      iwork = itau + *m;
3327
3328
/*                    Compute A=L*Q */
3329
/*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
3330
3331
0
      i__2 = *lwork - iwork + 1;
3332
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3333
0
        iwork], &i__2, &ierr);
3334
3335
/*                    Copy L to WORK(IU), zeroing out below it */
3336
3337
0
      slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
3338
0
        ldwrku);
3339
0
      i__2 = *m - 1;
3340
0
      i__3 = *m - 1;
3341
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
3342
0
        ldwrku], &ldwrku);
3343
3344
/*                    Generate Q in A */
3345
/*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
3346
3347
0
      i__2 = *lwork - iwork + 1;
3348
0
      sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3349
0
        work[iwork], &i__2, &ierr);
3350
0
      ie = itau;
3351
0
      itauq = ie + *m;
3352
0
      itaup = itauq + *m;
3353
0
      iwork = itaup + *m;
3354
3355
/*                    Bidiagonalize L in WORK(IU), copying result to */
3356
/*                    WORK(IR) */
3357
/*                    (Workspace: need 2*M*M+4*M, */
3358
/*                                prefer 2*M*M+3*M+2*M*NB) */
3359
3360
0
      i__2 = *lwork - iwork + 1;
3361
0
      sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
3362
0
        work[itauq], &work[itaup], &work[iwork], &
3363
0
        i__2, &ierr);
3364
0
      slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
3365
0
        ldwrkr);
3366
3367
/*                    Generate right bidiagonalizing vectors in WORK(IU) */
3368
/*                    (Workspace: need 2*M*M+4*M-1, */
3369
/*                                prefer 2*M*M+3*M+(M-1)*NB) */
3370
3371
0
      i__2 = *lwork - iwork + 1;
3372
0
      sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
3373
0
        , &work[iwork], &i__2, &ierr);
3374
3375
/*                    Generate left bidiagonalizing vectors in WORK(IR) */
3376
/*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
3377
3378
0
      i__2 = *lwork - iwork + 1;
3379
0
      sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
3380
0
        , &work[iwork], &i__2, &ierr);
3381
0
      iwork = ie + *m;
3382
3383
/*                    Perform bidiagonal QR iteration, computing left */
3384
/*                    singular vectors of L in WORK(IR) and computing */
3385
/*                    right singular vectors of L in WORK(IU) */
3386
/*                    (Workspace: need 2*M*M+BDSPAC) */
3387
3388
0
      sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
3389
0
        iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
3390
0
        &work[iwork], info);
3391
3392
/*                    Multiply right singular vectors of L in WORK(IU) by */
3393
/*                    Q in A, storing result in VT */
3394
/*                    (Workspace: need M*M) */
3395
3396
0
      sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
3397
0
        &a[a_offset], lda, &c_b57, &vt[vt_offset], 
3398
0
        ldvt);
3399
3400
/*                    Copy left singular vectors of L to A */
3401
/*                    (Workspace: need M*M) */
3402
3403
0
      slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
3404
0
        lda);
3405
3406
0
        } else {
3407
3408
/*                    Insufficient workspace for a fast algorithm */
3409
3410
0
      itau = 1;
3411
0
      iwork = itau + *m;
3412
3413
/*                    Compute A=L*Q, copying result to VT */
3414
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3415
3416
0
      i__2 = *lwork - iwork + 1;
3417
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3418
0
        iwork], &i__2, &ierr);
3419
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3420
0
        ldvt);
3421
3422
/*                    Generate Q in VT */
3423
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3424
3425
0
      i__2 = *lwork - iwork + 1;
3426
0
      sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3427
0
        work[iwork], &i__2, &ierr);
3428
0
      ie = itau;
3429
0
      itauq = ie + *m;
3430
0
      itaup = itauq + *m;
3431
0
      iwork = itaup + *m;
3432
3433
/*                    Zero out above L in A */
3434
3435
0
      i__2 = *m - 1;
3436
0
      i__3 = *m - 1;
3437
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
3438
0
        << 1) + 1], lda);
3439
3440
/*                    Bidiagonalize L in A */
3441
/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3442
3443
0
      i__2 = *lwork - iwork + 1;
3444
0
      sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
3445
0
        work[itauq], &work[itaup], &work[iwork], &
3446
0
        i__2, &ierr);
3447
3448
/*                    Multiply right vectors bidiagonalizing L by Q in VT */
3449
/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3450
3451
0
      i__2 = *lwork - iwork + 1;
3452
0
      sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
3453
0
        work[itaup], &vt[vt_offset], ldvt, &work[
3454
0
        iwork], &i__2, &ierr);
3455
3456
/*                    Generate left bidiagonalizing vectors of L in A */
3457
/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
3458
3459
0
      i__2 = *lwork - iwork + 1;
3460
0
      sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
3461
0
         &work[iwork], &i__2, &ierr);
3462
0
      iwork = ie + *m;
3463
3464
/*                    Perform bidiagonal QR iteration, compute left */
3465
/*                    singular vectors of A in A and compute right */
3466
/*                    singular vectors of A in VT */
3467
/*                    (Workspace: need BDSPAC) */
3468
3469
0
      sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
3470
0
        vt_offset], ldvt, &a[a_offset], lda, dum, &
3471
0
        c__1, &work[iwork], info);
3472
3473
0
        }
3474
3475
0
    } else if (wntuas) {
3476
3477
/*                 Path 6t(N much larger than M, JOBU='S' or 'A', */
3478
/*                         JOBVT='S') */
3479
/*                 M right singular vectors to be computed in VT and */
3480
/*                 M left singular vectors to be computed in U */
3481
3482
/* Computing MAX */
3483
0
        i__2 = *m << 2;
3484
0
        if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
3485
3486
/*                    Sufficient workspace for a fast algorithm */
3487
3488
0
      iu = 1;
3489
0
      if (*lwork >= wrkbl + *lda * *m) {
3490
3491
/*                       WORK(IU) is LDA by N */
3492
3493
0
          ldwrku = *lda;
3494
0
      } else {
3495
3496
/*                       WORK(IU) is LDA by M */
3497
3498
0
          ldwrku = *m;
3499
0
      }
3500
0
      itau = iu + ldwrku * *m;
3501
0
      iwork = itau + *m;
3502
3503
/*                    Compute A=L*Q */
3504
/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3505
3506
0
      i__2 = *lwork - iwork + 1;
3507
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3508
0
        iwork], &i__2, &ierr);
3509
3510
/*                    Copy L to WORK(IU), zeroing out above it */
3511
3512
0
      slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
3513
0
        ldwrku);
3514
0
      i__2 = *m - 1;
3515
0
      i__3 = *m - 1;
3516
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
3517
0
        ldwrku], &ldwrku);
3518
3519
/*                    Generate Q in A */
3520
/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3521
3522
0
      i__2 = *lwork - iwork + 1;
3523
0
      sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &
3524
0
        work[iwork], &i__2, &ierr);
3525
0
      ie = itau;
3526
0
      itauq = ie + *m;
3527
0
      itaup = itauq + *m;
3528
0
      iwork = itaup + *m;
3529
3530
/*                    Bidiagonalize L in WORK(IU), copying result to U */
3531
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3532
3533
0
      i__2 = *lwork - iwork + 1;
3534
0
      sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
3535
0
        work[itauq], &work[itaup], &work[iwork], &
3536
0
        i__2, &ierr);
3537
0
      slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
3538
0
        ldu);
3539
3540
/*                    Generate right bidiagonalizing vectors in WORK(IU) */
3541
/*                    (Workspace: need M*M+4*M-1, */
3542
/*                                prefer M*M+3*M+(M-1)*NB) */
3543
3544
0
      i__2 = *lwork - iwork + 1;
3545
0
      sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
3546
0
        , &work[iwork], &i__2, &ierr);
3547
3548
/*                    Generate left bidiagonalizing vectors in U */
3549
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
3550
3551
0
      i__2 = *lwork - iwork + 1;
3552
0
      sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
3553
0
         &work[iwork], &i__2, &ierr);
3554
0
      iwork = ie + *m;
3555
3556
/*                    Perform bidiagonal QR iteration, computing left */
3557
/*                    singular vectors of L in U and computing right */
3558
/*                    singular vectors of L in WORK(IU) */
3559
/*                    (Workspace: need M*M+BDSPAC) */
3560
3561
0
      sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
3562
0
        iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
3563
0
        work[iwork], info);
3564
3565
/*                    Multiply right singular vectors of L in WORK(IU) by */
3566
/*                    Q in A, storing result in VT */
3567
/*                    (Workspace: need M*M) */
3568
3569
0
      sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
3570
0
        &a[a_offset], lda, &c_b57, &vt[vt_offset], 
3571
0
        ldvt);
3572
3573
0
        } else {
3574
3575
/*                    Insufficient workspace for a fast algorithm */
3576
3577
0
      itau = 1;
3578
0
      iwork = itau + *m;
3579
3580
/*                    Compute A=L*Q, copying result to VT */
3581
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3582
3583
0
      i__2 = *lwork - iwork + 1;
3584
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3585
0
        iwork], &i__2, &ierr);
3586
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3587
0
        ldvt);
3588
3589
/*                    Generate Q in VT */
3590
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3591
3592
0
      i__2 = *lwork - iwork + 1;
3593
0
      sorglq_(m, n, m, &vt[vt_offset], ldvt, &work[itau], &
3594
0
        work[iwork], &i__2, &ierr);
3595
3596
/*                    Copy L to U, zeroing out above it */
3597
3598
0
      slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
3599
0
        ldu);
3600
0
      i__2 = *m - 1;
3601
0
      i__3 = *m - 1;
3602
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 
3603
0
        << 1) + 1], ldu);
3604
0
      ie = itau;
3605
0
      itauq = ie + *m;
3606
0
      itaup = itauq + *m;
3607
0
      iwork = itaup + *m;
3608
3609
/*                    Bidiagonalize L in U */
3610
/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3611
3612
0
      i__2 = *lwork - iwork + 1;
3613
0
      sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
3614
0
        work[itauq], &work[itaup], &work[iwork], &
3615
0
        i__2, &ierr);
3616
3617
/*                    Multiply right bidiagonalizing vectors in U by Q */
3618
/*                    in VT */
3619
/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3620
3621
0
      i__2 = *lwork - iwork + 1;
3622
0
      sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
3623
0
        work[itaup], &vt[vt_offset], ldvt, &work[
3624
0
        iwork], &i__2, &ierr);
3625
3626
/*                    Generate left bidiagonalizing vectors in U */
3627
/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
3628
3629
0
      i__2 = *lwork - iwork + 1;
3630
0
      sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
3631
0
         &work[iwork], &i__2, &ierr);
3632
0
      iwork = ie + *m;
3633
3634
/*                    Perform bidiagonal QR iteration, computing left */
3635
/*                    singular vectors of A in U and computing right */
3636
/*                    singular vectors of A in VT */
3637
/*                    (Workspace: need BDSPAC) */
3638
3639
0
      sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
3640
0
        vt_offset], ldvt, &u[u_offset], ldu, dum, &
3641
0
        c__1, &work[iwork], info);
3642
3643
0
        }
3644
3645
0
    }
3646
3647
0
      } else if (wntva) {
3648
3649
0
    if (wntun) {
3650
3651
/*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A') */
3652
/*                 N right singular vectors to be computed in VT and */
3653
/*                 no left singular vectors to be computed */
3654
3655
/* Computing MAX */
3656
0
        i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
3657
0
        if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
3658
3659
/*                    Sufficient workspace for a fast algorithm */
3660
3661
0
      ir = 1;
3662
0
      if (*lwork >= wrkbl + *lda * *m) {
3663
3664
/*                       WORK(IR) is LDA by M */
3665
3666
0
          ldwrkr = *lda;
3667
0
      } else {
3668
3669
/*                       WORK(IR) is M by M */
3670
3671
0
          ldwrkr = *m;
3672
0
      }
3673
0
      itau = ir + ldwrkr * *m;
3674
0
      iwork = itau + *m;
3675
3676
/*                    Compute A=L*Q, copying result to VT */
3677
/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
3678
3679
0
      i__2 = *lwork - iwork + 1;
3680
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3681
0
        iwork], &i__2, &ierr);
3682
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3683
0
        ldvt);
3684
3685
/*                    Copy L to WORK(IR), zeroing out above it */
3686
3687
0
      slacpy_("L", m, m, &a[a_offset], lda, &work[ir], &
3688
0
        ldwrkr);
3689
0
      i__2 = *m - 1;
3690
0
      i__3 = *m - 1;
3691
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[ir + 
3692
0
        ldwrkr], &ldwrkr);
3693
3694
/*                    Generate Q in VT */
3695
/*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
3696
3697
0
      i__2 = *lwork - iwork + 1;
3698
0
      sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
3699
0
        work[iwork], &i__2, &ierr);
3700
0
      ie = itau;
3701
0
      itauq = ie + *m;
3702
0
      itaup = itauq + *m;
3703
0
      iwork = itaup + *m;
3704
3705
/*                    Bidiagonalize L in WORK(IR) */
3706
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
3707
3708
0
      i__2 = *lwork - iwork + 1;
3709
0
      sgebrd_(m, m, &work[ir], &ldwrkr, &s[1], &work[ie], &
3710
0
        work[itauq], &work[itaup], &work[iwork], &
3711
0
        i__2, &ierr);
3712
3713
/*                    Generate right bidiagonalizing vectors in WORK(IR) */
3714
/*                    (Workspace: need M*M+4*M-1, */
3715
/*                                prefer M*M+3*M+(M-1)*NB) */
3716
3717
0
      i__2 = *lwork - iwork + 1;
3718
0
      sorgbr_("P", m, m, m, &work[ir], &ldwrkr, &work[itaup]
3719
0
        , &work[iwork], &i__2, &ierr);
3720
0
      iwork = ie + *m;
3721
3722
/*                    Perform bidiagonal QR iteration, computing right */
3723
/*                    singular vectors of L in WORK(IR) */
3724
/*                    (Workspace: need M*M+BDSPAC) */
3725
3726
0
      sbdsqr_("U", m, m, &c__0, &c__0, &s[1], &work[ie], &
3727
0
        work[ir], &ldwrkr, dum, &c__1, dum, &c__1, &
3728
0
        work[iwork], info);
3729
3730
/*                    Multiply right singular vectors of L in WORK(IR) by */
3731
/*                    Q in VT, storing result in A */
3732
/*                    (Workspace: need M*M) */
3733
3734
0
      sgemm_("N", "N", m, n, m, &c_b79, &work[ir], &ldwrkr, 
3735
0
        &vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
3736
0
        lda);
3737
3738
/*                    Copy right singular vectors of A from A to VT */
3739
3740
0
      slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
3741
0
        ldvt);
3742
3743
0
        } else {
3744
3745
/*                    Insufficient workspace for a fast algorithm */
3746
3747
0
      itau = 1;
3748
0
      iwork = itau + *m;
3749
3750
/*                    Compute A=L*Q, copying result to VT */
3751
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3752
3753
0
      i__2 = *lwork - iwork + 1;
3754
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3755
0
        iwork], &i__2, &ierr);
3756
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3757
0
        ldvt);
3758
3759
/*                    Generate Q in VT */
3760
/*                    (Workspace: need M+N, prefer M+N*NB) */
3761
3762
0
      i__2 = *lwork - iwork + 1;
3763
0
      sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
3764
0
        work[iwork], &i__2, &ierr);
3765
0
      ie = itau;
3766
0
      itauq = ie + *m;
3767
0
      itaup = itauq + *m;
3768
0
      iwork = itaup + *m;
3769
3770
/*                    Zero out above L in A */
3771
3772
0
      i__2 = *m - 1;
3773
0
      i__3 = *m - 1;
3774
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
3775
0
        << 1) + 1], lda);
3776
3777
/*                    Bidiagonalize L in A */
3778
/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3779
3780
0
      i__2 = *lwork - iwork + 1;
3781
0
      sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
3782
0
        work[itauq], &work[itaup], &work[iwork], &
3783
0
        i__2, &ierr);
3784
3785
/*                    Multiply right bidiagonalizing vectors in A by Q */
3786
/*                    in VT */
3787
/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3788
3789
0
      i__2 = *lwork - iwork + 1;
3790
0
      sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
3791
0
        work[itaup], &vt[vt_offset], ldvt, &work[
3792
0
        iwork], &i__2, &ierr);
3793
0
      iwork = ie + *m;
3794
3795
/*                    Perform bidiagonal QR iteration, computing right */
3796
/*                    singular vectors of A in VT */
3797
/*                    (Workspace: need BDSPAC) */
3798
3799
0
      sbdsqr_("U", m, n, &c__0, &c__0, &s[1], &work[ie], &
3800
0
        vt[vt_offset], ldvt, dum, &c__1, dum, &c__1, &
3801
0
        work[iwork], info);
3802
3803
0
        }
3804
3805
0
    } else if (wntuo) {
3806
3807
/*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A') */
3808
/*                 N right singular vectors to be computed in VT and */
3809
/*                 M left singular vectors to be overwritten on A */
3810
3811
/* Computing MAX */
3812
0
        i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
3813
0
        if (*lwork >= (*m << 1) * *m + f2cmax(i__2,bdspac)) {
3814
3815
/*                    Sufficient workspace for a fast algorithm */
3816
3817
0
      iu = 1;
3818
0
      if (*lwork >= wrkbl + (*lda << 1) * *m) {
3819
3820
/*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M */
3821
3822
0
          ldwrku = *lda;
3823
0
          ir = iu + ldwrku * *m;
3824
0
          ldwrkr = *lda;
3825
0
      } else if (*lwork >= wrkbl + (*lda + *m) * *m) {
3826
3827
/*                       WORK(IU) is LDA by M and WORK(IR) is M by M */
3828
3829
0
          ldwrku = *lda;
3830
0
          ir = iu + ldwrku * *m;
3831
0
          ldwrkr = *m;
3832
0
      } else {
3833
3834
/*                       WORK(IU) is M by M and WORK(IR) is M by M */
3835
3836
0
          ldwrku = *m;
3837
0
          ir = iu + ldwrku * *m;
3838
0
          ldwrkr = *m;
3839
0
      }
3840
0
      itau = ir + ldwrkr * *m;
3841
0
      iwork = itau + *m;
3842
3843
/*                    Compute A=L*Q, copying result to VT */
3844
/*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) */
3845
3846
0
      i__2 = *lwork - iwork + 1;
3847
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3848
0
        iwork], &i__2, &ierr);
3849
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3850
0
        ldvt);
3851
3852
/*                    Generate Q in VT */
3853
/*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) */
3854
3855
0
      i__2 = *lwork - iwork + 1;
3856
0
      sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
3857
0
        work[iwork], &i__2, &ierr);
3858
3859
/*                    Copy L to WORK(IU), zeroing out above it */
3860
3861
0
      slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
3862
0
        ldwrku);
3863
0
      i__2 = *m - 1;
3864
0
      i__3 = *m - 1;
3865
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
3866
0
        ldwrku], &ldwrku);
3867
0
      ie = itau;
3868
0
      itauq = ie + *m;
3869
0
      itaup = itauq + *m;
3870
0
      iwork = itaup + *m;
3871
3872
/*                    Bidiagonalize L in WORK(IU), copying result to */
3873
/*                    WORK(IR) */
3874
/*                    (Workspace: need 2*M*M+4*M, */
3875
/*                                prefer 2*M*M+3*M+2*M*NB) */
3876
3877
0
      i__2 = *lwork - iwork + 1;
3878
0
      sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
3879
0
        work[itauq], &work[itaup], &work[iwork], &
3880
0
        i__2, &ierr);
3881
0
      slacpy_("L", m, m, &work[iu], &ldwrku, &work[ir], &
3882
0
        ldwrkr);
3883
3884
/*                    Generate right bidiagonalizing vectors in WORK(IU) */
3885
/*                    (Workspace: need 2*M*M+4*M-1, */
3886
/*                                prefer 2*M*M+3*M+(M-1)*NB) */
3887
3888
0
      i__2 = *lwork - iwork + 1;
3889
0
      sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
3890
0
        , &work[iwork], &i__2, &ierr);
3891
3892
/*                    Generate left bidiagonalizing vectors in WORK(IR) */
3893
/*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) */
3894
3895
0
      i__2 = *lwork - iwork + 1;
3896
0
      sorgbr_("Q", m, m, m, &work[ir], &ldwrkr, &work[itauq]
3897
0
        , &work[iwork], &i__2, &ierr);
3898
0
      iwork = ie + *m;
3899
3900
/*                    Perform bidiagonal QR iteration, computing left */
3901
/*                    singular vectors of L in WORK(IR) and computing */
3902
/*                    right singular vectors of L in WORK(IU) */
3903
/*                    (Workspace: need 2*M*M+BDSPAC) */
3904
3905
0
      sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
3906
0
        iu], &ldwrku, &work[ir], &ldwrkr, dum, &c__1, 
3907
0
        &work[iwork], info);
3908
3909
/*                    Multiply right singular vectors of L in WORK(IU) by */
3910
/*                    Q in VT, storing result in A */
3911
/*                    (Workspace: need M*M) */
3912
3913
0
      sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
3914
0
        &vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
3915
0
        lda);
3916
3917
/*                    Copy right singular vectors of A from A to VT */
3918
3919
0
      slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
3920
0
        ldvt);
3921
3922
/*                    Copy left singular vectors of A from WORK(IR) to A */
3923
3924
0
      slacpy_("F", m, m, &work[ir], &ldwrkr, &a[a_offset], 
3925
0
        lda);
3926
3927
0
        } else {
3928
3929
/*                    Insufficient workspace for a fast algorithm */
3930
3931
0
      itau = 1;
3932
0
      iwork = itau + *m;
3933
3934
/*                    Compute A=L*Q, copying result to VT */
3935
/*                    (Workspace: need 2*M, prefer M+M*NB) */
3936
3937
0
      i__2 = *lwork - iwork + 1;
3938
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
3939
0
        iwork], &i__2, &ierr);
3940
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
3941
0
        ldvt);
3942
3943
/*                    Generate Q in VT */
3944
/*                    (Workspace: need M+N, prefer M+N*NB) */
3945
3946
0
      i__2 = *lwork - iwork + 1;
3947
0
      sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
3948
0
        work[iwork], &i__2, &ierr);
3949
0
      ie = itau;
3950
0
      itauq = ie + *m;
3951
0
      itaup = itauq + *m;
3952
0
      iwork = itaup + *m;
3953
3954
/*                    Zero out above L in A */
3955
3956
0
      i__2 = *m - 1;
3957
0
      i__3 = *m - 1;
3958
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &a[(a_dim1 
3959
0
        << 1) + 1], lda);
3960
3961
/*                    Bidiagonalize L in A */
3962
/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
3963
3964
0
      i__2 = *lwork - iwork + 1;
3965
0
      sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &
3966
0
        work[itauq], &work[itaup], &work[iwork], &
3967
0
        i__2, &ierr);
3968
3969
/*                    Multiply right bidiagonalizing vectors in A by Q */
3970
/*                    in VT */
3971
/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
3972
3973
0
      i__2 = *lwork - iwork + 1;
3974
0
      sormbr_("P", "L", "T", m, n, m, &a[a_offset], lda, &
3975
0
        work[itaup], &vt[vt_offset], ldvt, &work[
3976
0
        iwork], &i__2, &ierr);
3977
3978
/*                    Generate left bidiagonalizing vectors in A */
3979
/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
3980
3981
0
      i__2 = *lwork - iwork + 1;
3982
0
      sorgbr_("Q", m, m, m, &a[a_offset], lda, &work[itauq],
3983
0
         &work[iwork], &i__2, &ierr);
3984
0
      iwork = ie + *m;
3985
3986
/*                    Perform bidiagonal QR iteration, computing left */
3987
/*                    singular vectors of A in A and computing right */
3988
/*                    singular vectors of A in VT */
3989
/*                    (Workspace: need BDSPAC) */
3990
3991
0
      sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
3992
0
        vt_offset], ldvt, &a[a_offset], lda, dum, &
3993
0
        c__1, &work[iwork], info);
3994
3995
0
        }
3996
3997
0
    } else if (wntuas) {
3998
3999
/*                 Path 9t(N much larger than M, JOBU='S' or 'A', */
4000
/*                         JOBVT='A') */
4001
/*                 N right singular vectors to be computed in VT and */
4002
/*                 M left singular vectors to be computed in U */
4003
4004
/* Computing MAX */
4005
0
        i__2 = *n + *m, i__3 = *m << 2, i__2 = f2cmax(i__2,i__3);
4006
0
        if (*lwork >= *m * *m + f2cmax(i__2,bdspac)) {
4007
4008
/*                    Sufficient workspace for a fast algorithm */
4009
4010
0
      iu = 1;
4011
0
      if (*lwork >= wrkbl + *lda * *m) {
4012
4013
/*                       WORK(IU) is LDA by M */
4014
4015
0
          ldwrku = *lda;
4016
0
      } else {
4017
4018
/*                       WORK(IU) is M by M */
4019
4020
0
          ldwrku = *m;
4021
0
      }
4022
0
      itau = iu + ldwrku * *m;
4023
0
      iwork = itau + *m;
4024
4025
/*                    Compute A=L*Q, copying result to VT */
4026
/*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB) */
4027
4028
0
      i__2 = *lwork - iwork + 1;
4029
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4030
0
        iwork], &i__2, &ierr);
4031
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4032
0
        ldvt);
4033
4034
/*                    Generate Q in VT */
4035
/*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB) */
4036
4037
0
      i__2 = *lwork - iwork + 1;
4038
0
      sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4039
0
        work[iwork], &i__2, &ierr);
4040
4041
/*                    Copy L to WORK(IU), zeroing out above it */
4042
4043
0
      slacpy_("L", m, m, &a[a_offset], lda, &work[iu], &
4044
0
        ldwrku);
4045
0
      i__2 = *m - 1;
4046
0
      i__3 = *m - 1;
4047
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &work[iu + 
4048
0
        ldwrku], &ldwrku);
4049
0
      ie = itau;
4050
0
      itauq = ie + *m;
4051
0
      itaup = itauq + *m;
4052
0
      iwork = itaup + *m;
4053
4054
/*                    Bidiagonalize L in WORK(IU), copying result to U */
4055
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) */
4056
4057
0
      i__2 = *lwork - iwork + 1;
4058
0
      sgebrd_(m, m, &work[iu], &ldwrku, &s[1], &work[ie], &
4059
0
        work[itauq], &work[itaup], &work[iwork], &
4060
0
        i__2, &ierr);
4061
0
      slacpy_("L", m, m, &work[iu], &ldwrku, &u[u_offset], 
4062
0
        ldu);
4063
4064
/*                    Generate right bidiagonalizing vectors in WORK(IU) */
4065
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) */
4066
4067
0
      i__2 = *lwork - iwork + 1;
4068
0
      sorgbr_("P", m, m, m, &work[iu], &ldwrku, &work[itaup]
4069
0
        , &work[iwork], &i__2, &ierr);
4070
4071
/*                    Generate left bidiagonalizing vectors in U */
4072
/*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) */
4073
4074
0
      i__2 = *lwork - iwork + 1;
4075
0
      sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
4076
0
         &work[iwork], &i__2, &ierr);
4077
0
      iwork = ie + *m;
4078
4079
/*                    Perform bidiagonal QR iteration, computing left */
4080
/*                    singular vectors of L in U and computing right */
4081
/*                    singular vectors of L in WORK(IU) */
4082
/*                    (Workspace: need M*M+BDSPAC) */
4083
4084
0
      sbdsqr_("U", m, m, m, &c__0, &s[1], &work[ie], &work[
4085
0
        iu], &ldwrku, &u[u_offset], ldu, dum, &c__1, &
4086
0
        work[iwork], info);
4087
4088
/*                    Multiply right singular vectors of L in WORK(IU) by */
4089
/*                    Q in VT, storing result in A */
4090
/*                    (Workspace: need M*M) */
4091
4092
0
      sgemm_("N", "N", m, n, m, &c_b79, &work[iu], &ldwrku, 
4093
0
        &vt[vt_offset], ldvt, &c_b57, &a[a_offset], 
4094
0
        lda);
4095
4096
/*                    Copy right singular vectors of A from A to VT */
4097
4098
0
      slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], 
4099
0
        ldvt);
4100
4101
0
        } else {
4102
4103
/*                    Insufficient workspace for a fast algorithm */
4104
4105
0
      itau = 1;
4106
0
      iwork = itau + *m;
4107
4108
/*                    Compute A=L*Q, copying result to VT */
4109
/*                    (Workspace: need 2*M, prefer M+M*NB) */
4110
4111
0
      i__2 = *lwork - iwork + 1;
4112
0
      sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[
4113
0
        iwork], &i__2, &ierr);
4114
0
      slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], 
4115
0
        ldvt);
4116
4117
/*                    Generate Q in VT */
4118
/*                    (Workspace: need M+N, prefer M+N*NB) */
4119
4120
0
      i__2 = *lwork - iwork + 1;
4121
0
      sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &
4122
0
        work[iwork], &i__2, &ierr);
4123
4124
/*                    Copy L to U, zeroing out above it */
4125
4126
0
      slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], 
4127
0
        ldu);
4128
0
      i__2 = *m - 1;
4129
0
      i__3 = *m - 1;
4130
0
      slaset_("U", &i__2, &i__3, &c_b57, &c_b57, &u[(u_dim1 
4131
0
        << 1) + 1], ldu);
4132
0
      ie = itau;
4133
0
      itauq = ie + *m;
4134
0
      itaup = itauq + *m;
4135
0
      iwork = itaup + *m;
4136
4137
/*                    Bidiagonalize L in U */
4138
/*                    (Workspace: need 4*M, prefer 3*M+2*M*NB) */
4139
4140
0
      i__2 = *lwork - iwork + 1;
4141
0
      sgebrd_(m, m, &u[u_offset], ldu, &s[1], &work[ie], &
4142
0
        work[itauq], &work[itaup], &work[iwork], &
4143
0
        i__2, &ierr);
4144
4145
/*                    Multiply right bidiagonalizing vectors in U by Q */
4146
/*                    in VT */
4147
/*                    (Workspace: need 3*M+N, prefer 3*M+N*NB) */
4148
4149
0
      i__2 = *lwork - iwork + 1;
4150
0
      sormbr_("P", "L", "T", m, n, m, &u[u_offset], ldu, &
4151
0
        work[itaup], &vt[vt_offset], ldvt, &work[
4152
0
        iwork], &i__2, &ierr);
4153
4154
/*                    Generate left bidiagonalizing vectors in U */
4155
/*                    (Workspace: need 4*M, prefer 3*M+M*NB) */
4156
4157
0
      i__2 = *lwork - iwork + 1;
4158
0
      sorgbr_("Q", m, m, m, &u[u_offset], ldu, &work[itauq],
4159
0
         &work[iwork], &i__2, &ierr);
4160
0
      iwork = ie + *m;
4161
4162
/*                    Perform bidiagonal QR iteration, computing left */
4163
/*                    singular vectors of A in U and computing right */
4164
/*                    singular vectors of A in VT */
4165
/*                    (Workspace: need BDSPAC) */
4166
4167
0
      sbdsqr_("U", m, n, m, &c__0, &s[1], &work[ie], &vt[
4168
0
        vt_offset], ldvt, &u[u_offset], ldu, dum, &
4169
0
        c__1, &work[iwork], info);
4170
4171
0
        }
4172
4173
0
    }
4174
4175
0
      }
4176
4177
0
  } else {
4178
4179
/*           N .LT. MNTHR */
4180
4181
/*           Path 10t(N greater than M, but not much larger) */
4182
/*           Reduce to bidiagonal form without LQ decomposition */
4183
4184
0
      ie = 1;
4185
0
      itauq = ie + *m;
4186
0
      itaup = itauq + *m;
4187
0
      iwork = itaup + *m;
4188
4189
/*           Bidiagonalize A */
4190
/*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) */
4191
4192
0
      i__2 = *lwork - iwork + 1;
4193
0
      sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
4194
0
        work[itaup], &work[iwork], &i__2, &ierr);
4195
0
      if (wntuas) {
4196
4197
/*              If left singular vectors desired in U, copy result to U */
4198
/*              and generate left bidiagonalizing vectors in U */
4199
/*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
4200
4201
0
    slacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
4202
0
    i__2 = *lwork - iwork + 1;
4203
0
    sorgbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
4204
0
      iwork], &i__2, &ierr);
4205
0
      }
4206
0
      if (wntvas) {
4207
4208
/*              If right singular vectors desired in VT, copy result to */
4209
/*              VT and generate right bidiagonalizing vectors in VT */
4210
/*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) */
4211
4212
0
    slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
4213
0
    if (wntva) {
4214
0
        nrvt = *n;
4215
0
    }
4216
0
    if (wntvs) {
4217
0
        nrvt = *m;
4218
0
    }
4219
0
    i__2 = *lwork - iwork + 1;
4220
0
    sorgbr_("P", &nrvt, n, m, &vt[vt_offset], ldvt, &work[itaup], 
4221
0
      &work[iwork], &i__2, &ierr);
4222
0
      }
4223
0
      if (wntuo) {
4224
4225
/*              If left singular vectors desired in A, generate left */
4226
/*              bidiagonalizing vectors in A */
4227
/*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) */
4228
4229
0
    i__2 = *lwork - iwork + 1;
4230
0
    sorgbr_("Q", m, m, n, &a[a_offset], lda, &work[itauq], &work[
4231
0
      iwork], &i__2, &ierr);
4232
0
      }
4233
0
      if (wntvo) {
4234
4235
/*              If right singular vectors desired in A, generate right */
4236
/*              bidiagonalizing vectors in A */
4237
/*              (Workspace: need 4*M, prefer 3*M+M*NB) */
4238
4239
0
    i__2 = *lwork - iwork + 1;
4240
0
    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
4241
0
      iwork], &i__2, &ierr);
4242
0
      }
4243
0
      iwork = ie + *m;
4244
0
      if (wntuas || wntuo) {
4245
0
    nru = *m;
4246
0
      }
4247
0
      if (wntun) {
4248
0
    nru = 0;
4249
0
      }
4250
0
      if (wntvas || wntvo) {
4251
0
    ncvt = *n;
4252
0
      }
4253
0
      if (wntvn) {
4254
0
    ncvt = 0;
4255
0
      }
4256
0
      if (! wntuo && ! wntvo) {
4257
4258
/*              Perform bidiagonal QR iteration, if desired, computing */
4259
/*              left singular vectors in U and computing right singular */
4260
/*              vectors in VT */
4261
/*              (Workspace: need BDSPAC) */
4262
4263
0
    sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
4264
0
      vt_offset], ldvt, &u[u_offset], ldu, dum, &c__1, &
4265
0
      work[iwork], info);
4266
0
      } else if (! wntuo && wntvo) {
4267
4268
/*              Perform bidiagonal QR iteration, if desired, computing */
4269
/*              left singular vectors in U and computing right singular */
4270
/*              vectors in A */
4271
/*              (Workspace: need BDSPAC) */
4272
4273
0
    sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &a[
4274
0
      a_offset], lda, &u[u_offset], ldu, dum, &c__1, &work[
4275
0
      iwork], info);
4276
0
      } else {
4277
4278
/*              Perform bidiagonal QR iteration, if desired, computing */
4279
/*              left singular vectors in A and computing right singular */
4280
/*              vectors in VT */
4281
/*              (Workspace: need BDSPAC) */
4282
4283
0
    sbdsqr_("L", m, &ncvt, &nru, &c__0, &s[1], &work[ie], &vt[
4284
0
      vt_offset], ldvt, &a[a_offset], lda, dum, &c__1, &
4285
0
      work[iwork], info);
4286
0
      }
4287
4288
0
  }
4289
4290
0
    }
4291
4292
/*     If SBDSQR failed to converge, copy unconverged superdiagonals */
4293
/*     to WORK( 2:MINMN ) */
4294
4295
0
    if (*info != 0) {
4296
0
  if (ie > 2) {
4297
0
      i__2 = minmn - 1;
4298
0
      for (i__ = 1; i__ <= i__2; ++i__) {
4299
0
    work[i__ + 1] = work[i__ + ie - 1];
4300
/* L50: */
4301
0
      }
4302
0
  }
4303
0
  if (ie < 2) {
4304
0
      for (i__ = minmn - 1; i__ >= 1; --i__) {
4305
0
    work[i__ + 1] = work[i__ + ie - 1];
4306
/* L60: */
4307
0
      }
4308
0
  }
4309
0
    }
4310
4311
/*     Undo scaling if necessary */
4312
4313
0
    if (iscl == 1) {
4314
0
  if (anrm > bignum) {
4315
0
      slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
4316
0
        minmn, &ierr);
4317
0
  }
4318
0
  if (*info != 0 && anrm > bignum) {
4319
0
      i__2 = minmn - 1;
4320
0
      slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__2, &c__1, &work[2],
4321
0
         &minmn, &ierr);
4322
0
  }
4323
0
  if (anrm < smlnum) {
4324
0
      slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
4325
0
        minmn, &ierr);
4326
0
  }
4327
0
  if (*info != 0 && anrm < smlnum) {
4328
0
      i__2 = minmn - 1;
4329
0
      slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__2, &c__1, &work[2],
4330
0
         &minmn, &ierr);
4331
0
  }
4332
0
    }
4333
4334
/*     Return optimal workspace in WORK(1) */
4335
4336
0
    work[1] = (real) maxwrk;
4337
4338
0
    return;
4339
4340
/*     End of SGESVD */
4341
4342
0
} /* sgesvd_ */
4343