/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2007 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <float.h>
	
#include "MatUtils.h"
#include "lapack_dlt.h"

/* DGESVD computes the singular value decomposition (SVD) of a real
*  M-by-N matrix A, optionally computing the left and/or right singular
*  vectors. The SVD is written
*
*       A = U * SIGMA * transpose(V)
*
*  where SIGMA is an M-by-N matrix which is zero except for its
*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
*  are the singular values of A; they are real and non-negative, and
*  are returned in descending order.  The first min(m,n) columns of
*  U and V are the left and right singular vectors of A.
*
*  Note that the routine returns V**T, not V.
*
*  Arguments
*  =========
*
*  JOBU    (input) CHARACTER*1
*          Specifies options for computing all or part of the matrix U:
*          = 'A':  all M columns of U are returned in array U:
*          = 'S':  the first min(m,n) columns of U (the left singular
*                  vectors) are returned in the array U;
*          = 'O':  the first min(m,n) columns of U (the left singular
*                  vectors) are overwritten on the array A;
*          = 'N':  no columns of U (no left singular vectors) are
*                  computed.
*
*  JOBVT   (input) CHARACTER*1
*          Specifies options for computing all or part of the matrix
*          V**T:
*          = 'A':  all N rows of V**T are returned in the array VT;
*          = 'S':  the first min(m,n) rows of V**T (the right singular
*                  vectors) are returned in the array VT;
*          = 'O':  the first min(m,n) rows of V**T (the right singular
*                  vectors) are overwritten on the array A;
*          = 'N':  no rows of V**T (no right singular vectors) are
*                  computed.
*
*          JOBVT and JOBU cannot both be 'O'.
*
*  M       (input) INTEGER
*          The number of rows of the input matrix A.  M >= 0.
*
*  N       (input) INTEGER
*          The number of columns of the input matrix A.  N >= 0.
*
*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
*          On entry, the M-by-N matrix A.
*          On exit,
*          if JOBU = 'O',  A is overwritten with the first min(m,n)
*                          columns of U (the left singular vectors,
*                          stored columnwise);
*          if JOBVT = 'O', A is overwritten with the first min(m,n)
*                          rows of V**T (the right singular vectors,
*                          stored rowwise);
*          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
*                          are destroyed.
*
*  LDA     (input) INTEGER
*          The leading dimension of the array A.  LDA >= max(1,M).
*
*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
*          The singular values of A, sorted so that S(i) >= S(i+1).
*
*  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
*          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
*          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
*          if JOBU = 'S', U contains the first min(m,n) columns of U
*          (the left singular vectors, stored columnwise);
*          if JOBU = 'N' or 'O', U is not referenced.
*
*  LDU     (input) INTEGER
*          The leading dimension of the array U.  LDU >= 1; if
*          JOBU = 'S' or 'A', LDU >= M.
*
*  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
*          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
*          V**T;
*          if JOBVT = 'S', VT contains the first min(m,n) rows of
*          V**T (the right singular vectors, stored rowwise);
*          if JOBVT = 'N' or 'O', VT is not referenced.
*
*  LDVT    (input) INTEGER
*          The leading dimension of the array VT.  LDVT >= 1; if
*          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
*
*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
*          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
*          superdiagonal elements of an upper bidiagonal matrix B
*          whose diagonal is in S (not necessarily sorted). B
*          satisfies A = U * B * VT, so it has the same singular values
*          as A, and singular vectors related by U and VT.
*
*  LWORK   (input) INTEGER
*          The dimension of the array WORK. LWORK >= 1.
*          LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
*          For good performance, LWORK should generally be larger.
*
*          If LWORK = -1, then a workspace query is assumed; the routine
*          only calculates the optimal size of the WORK array, returns
*          this value as the first entry of the WORK array, and no error
*          message related to LWORK is issued by XERBLA.
*
*  INFO    (output) INTEGER
*          = 0:  successful exit.
*          < 0:  if INFO = -i, the i-th argument had an illegal value.
*          > 0:  if DBDSQR did not converge, INFO specifies how many
*                superdiagonals of an intermediate bidiagonal form B
*                did not converge to zero. See the description of WORK
*                above for details.
*/
int
Dgesvd(char jobu, char jobvt, long int m, long int n, 
       double **a, long int lda, double *s,
       double **u, long int ldu, 
       double **vt, long int ldvt,
       double *work, long int lwork)
{
    long int        info;

    DGESVD(&jobu, &jobvt, &m, &n, 
           &a[0][0], &lda, s,
           &u[0][0], &ldu, 
           &vt[0][0], &ldvt,
           work, &lwork, 
           &info);

    return(info);
}


int
dgesvd_opt_dest(double **a, long int m, long int n,
                double **u, double *s, double **vt)
{
    long int        info;
    char            jobu = 'A';
    char            jobvt = 'A';
    long int        lda = m, ldu = m, ldvt = n;
    long int        lwork = -1;
    double         *work;

    work = (double *) malloc(sizeof(double));

    DGESVD(&jobvt, &jobu, &n, &m, 
           &a[0][0], &lda, s,
           &vt[0][0], &ldvt, /* because FORTRAN is column-wise, u and vt must be reversed */
           &u[0][0], &ldu,
           work, &lwork, 
           &info);

    lwork = work[0];
    free(work);
    work = (double *) malloc(lwork * sizeof(double));

    DGESVD(&jobvt, &jobu, &n, &m, 
           &a[0][0], &lda, s,
           &vt[0][0], &ldvt, 
           &u[0][0], &ldu,
           work, &lwork, 
           &info);

    free(work);

    return(info);
}


int
dgesvd_opt_save(double **a, long int m, long int n,
                double **u, double *s, double **vt)
{
    long int        info;
    char            jobu = 'A';
    char            jobvt = 'A';
    long int        lda = m, ldu = m, ldvt = n;
    long int        lwork = -1;
    double         *work;
    double        **mat = MatInit(m, n);

    memcpy(&mat[0][0], &a[0][0], m * n * sizeof(double));

    work = (double *) malloc(sizeof(double));

    DGESVD(&jobu, &jobvt, &m, &n, 
           &mat[0][0], &lda, s,
           &vt[0][0], &ldvt, 
           &u[0][0], &ldu,
           work, &lwork, 
           &info);

    lwork = work[0];
    free(work);
    work = (double *) malloc(lwork * sizeof(double));

    DGESVD(&jobu, &jobvt, &m, &n, 
           &mat[0][0], &lda, s,
           &vt[0][0], &ldvt, 
           &u[0][0], &ldu,
           work, &lwork, 
           &info);

    free(work);
    MatDestroy(mat);

    return((int) info);
}


/* DSYEV computes all eigenvalues and, optionally, eigenvectors of a
*  real symmetric matrix A.
*
*  Arguments
*  =========
*
*  JOBZ    (input) CHARACTER*1
*          = 'N':  Compute eigenvalues only;
*          = 'V':  Compute eigenvalues and eigenvectors.
*
*  UPLO    (input) CHARACTER*1
*          = 'U':  Upper triangle of A is stored;
*          = 'L':  Lower triangle of A is stored.
*
*  N       (input) INTEGER
*          The order of the matrix A.  N >= 0.
*
*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
*          On entry, the symmetric matrix A.  If UPLO = 'U', the
*          leading N-by-N upper triangular part of A contains the
*          upper triangular part of the matrix A.  If UPLO = 'L',
*          the leading N-by-N lower triangular part of A contains
*          the lower triangular part of the matrix A.
*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
*          orthonormal eigenvectors of the matrix A.
*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
*          or the upper triangle (if UPLO='U') of A, including the
*          diagonal, is destroyed.
*
*  LDA     (input) INTEGER
*          The leading dimension of the array A.  LDA >= max(1,N).
*
*  W       (output) DOUBLE PRECISION array, dimension (N)
*          If INFO = 0, the eigenvalues in ascending order.
*
*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*
*  LWORK   (input) INTEGER
*          The length of the array WORK.  LWORK >= max(1,3*N-1).
*          For optimal efficiency, LWORK >= (NB+2)*N,
*          where NB is the blocksize for DSYTRD returned by ILAENV.
*
*          If LWORK = -1, then a workspace query is assumed; the routine
*          only calculates the optimal size of the WORK array, returns
*          this value as the first entry of the WORK array, and no error
*          message related to LWORK is issued by XERBLA.
*
*  INFO    (output) INTEGER
*          = 0:  successful exit
*          < 0:  if INFO = -i, the i-th argument had an illegal value
*          > 0:  if INFO = i, the algorithm failed to converge; i
*                off-diagonal elements of an intermediate tridiagonal
*                form did not converge to zero.
*/
int
Dsyev(char jobz_v, char uplo_u,
      long int n, double **amat, double *w,
      double *work, long int lwork)
{
    long int        info;

    DSYEV(&jobz_v, &uplo_u, &n, &amat[0][0], &n, w, work, &lwork, &info);

    return((int) info);
}


int
dsyev_opt_dest(double **amat, long int n, double *w)
{
    long int        info;
    char            jobz = 'V';
    char            uplo = 'U';
    long int        lda = n;
    long int        lwork = -1;
    double         *work;

    work = (double *) malloc(sizeof(double));

    DSYEV(&jobz, &uplo, &n, &amat[0][0], &lda, w, work, &lwork, &info);

    lwork = work[0];
    free(work);
    work = (double *) malloc(lwork * sizeof(double));

    DSYEV(&jobz, &uplo, &n, &amat[0][0], &lda, w, work, &lwork, &info);

    free(work);

    return((int) info);
}


int
dsyev_opt_save(double **amat, long int n, double **evecs, double *evals)
{
    long int        info;
    char            jobz = 'V';
    char            uplo = 'U';
    long int        lda = n;
    long int        lwork = -1;
    double         *work;

    memcpy(&evecs[0][0], &amat[0][0], n * n * sizeof(double));

    work = (double *) malloc(sizeof(double));

    DSYEV(&jobz, &uplo, &n, &evecs[0][0], &lda, evals, work, &lwork, &info);

    lwork = work[0];
    free(work);
    work = (double *) malloc(lwork * sizeof(double));

    DSYEV(&jobz, &uplo, &n, &evecs[0][0], &lda, evals, work, &lwork, &info);

    free(work);

    return((int) info);
}


/* DSYEVR computes selected eigenvalues and, optionally, eigenvectors
*  of a real symmetric matrix T.  Eigenvalues and eigenvectors can be
*  selected by specifying either a range of values or a range of
*  indices for the desired eigenvalues.
*
*  Whenever possible, DSYEVR calls DSTEGR to compute the
*  eigenspectrum using Relatively Robust Representations.  DSTEGR
*  computes eigenvalues by the dqds algorithm, while orthogonal
*  eigenvectors are computed from various "good" L D L^T representations
*  (also known as Relatively Robust Representations). Gram-Schmidt
*  orthogonalization is avoided as far as possible. More specifically,
*  the various steps of the algorithm are as follows. For the i-th
*  unreduced block of T,
*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
*          is a relatively robust representation,
*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
*         relative accuracy by the dqds algorithm,
*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
*         close to the cluster, and go to step (a),
*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
*         compute the corresponding eigenvector by forming a
*         rank-revealing twisted factorization.
*  The desired accuracy of the output can be specified by the input
*  parameter ABSTOL.
*
*  For more details, see "A new O(n^2) algorithm for the symmetric
*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
*  Computer Science Division Technical Report No. UCB//CSD-97-971,
*  UC Berkeley, May 1997.
*
*
*  Note 1 : DSYEVR calls DSTEGR when the full spectrum is requested
*  on machines which conform to the ieee-754 floating point standard.
*  DSYEVR calls DSTEBZ and SSTEIN on non-ieee machines and
*  when partial spectrum requests are made.
*
*  Normal execution of DSTEGR may create NaNs and infinities and
*  hence may abort due to a floating point exception in environments
*  which do not handle NaNs and infinities in the ieee standard default
*  manner.
*
*  Arguments
*  =========
*
*  JOBZ    (input) CHARACTER*1
*          = 'N':  Compute eigenvalues only;
*          = 'V':  Compute eigenvalues and eigenvectors.
*
*  RANGE   (input) CHARACTER*1
*          = 'A': all eigenvalues will be found.
*          = 'V': all eigenvalues in the half-open interval (VL,VU]
*                 will be found.
*          = 'I': the IL-th through IU-th eigenvalues will be found.
********** For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and
********** DSTEIN are called
*
*  UPLO    (input) CHARACTER*1
*          = 'U':  Upper triangle of A is stored;
*          = 'L':  Lower triangle of A is stored.
*
*  N       (input) INTEGER
*          The order of the matrix A.  N >= 0.
*
*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
*          On entry, the symmetric matrix A.  If UPLO = 'U', the
*          leading N-by-N upper triangular part of A contains the
*          upper triangular part of the matrix A.  If UPLO = 'L',
*          the leading N-by-N lower triangular part of A contains
*          the lower triangular part of the matrix A.
*          On exit, the lower triangle (if UPLO='L') or the upper
*          triangle (if UPLO='U') of A, including the diagonal, is
*          destroyed.
*
*  LDA     (input) INTEGER
*          The leading dimension of the array A.  LDA >= max(1,N).
*
*  VL      (input) DOUBLE PRECISION
*  VU      (input) DOUBLE PRECISION
*          If RANGE='V', the lower and upper bounds of the interval to
*          be searched for eigenvalues. VL < VU.
*          Not referenced if RANGE = 'A' or 'I'.
*
*  IL      (input) INTEGER
*  IU      (input) INTEGER
*          If RANGE='I', the indices (in ascending order) of the
*          smallest and largest eigenvalues to be returned.
*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
*          Not referenced if RANGE = 'A' or 'V'.
*
*  ABSTOL  (input) DOUBLE PRECISION
*          The absolute error tolerance for the eigenvalues.
*          An approximate eigenvalue is accepted as converged
*          when it is determined to lie in an interval [a,b]
*          of width less than or equal to
*
*                  ABSTOL + EPS *   max( |a|,|b| ) ,
*
*          where EPS is the machine precision.  If ABSTOL is less than
*          or equal to zero, then  EPS*|T|  will be used in its place,
*          where |T| is the 1-norm of the tridiagonal matrix obtained
*          by reducing A to tridiagonal form.
*
*          See "Computing Small Singular Values of Bidiagonal Matrices
*          with Guaranteed High Relative Accuracy," by Demmel and
*          Kahan, LAPACK Working Note #3.
*
*          If high relative accuracy is important, set ABSTOL to
*          DLAMCH( 'Safe minimum' ).  Doing so will guarantee that
*          eigenvalues are computed to high relative accuracy when
*          possible in future releases.  The current code does not
*          make any guarantees about high relative accuracy, but
*          furutre releases will. See J. Barlow and J. Demmel,
*          "Computing Accurate Eigensystems of Scaled Diagonally
*          Dominant Matrices", LAPACK Working Note #7, for a discussion
*          of which matrices define their eigenvalues to high relative
*          accuracy.
*
*  M       (output) INTEGER
*          The total number of eigenvalues found.  0 <= M <= N.
*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
*
*  W       (output) DOUBLE PRECISION array, dimension (N)
*          The first M elements contain the selected eigenvalues in
*          ascending order.
*
*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))
*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
*          contain the orthonormal eigenvectors of the matrix A
*          corresponding to the selected eigenvalues, with the i-th
*          column of Z holding the eigenvector associated with W(i).
*          If JOBZ = 'N', then Z is not referenced.
*          Note: the user must ensure that at least max(1,M) columns are
*          supplied in the array Z; if RANGE = 'V', the exact value of M
*          is not known in advance and an upper bound must be used.
*
*  LDZ     (input) INTEGER
*          The leading dimension of the array Z.  LDZ >= 1, and if
*          JOBZ = 'V', LDZ >= max(1,N).
*
*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) )
*          The support of the eigenvectors in Z, i.e., the indices
*          indicating the nonzero elements in Z. The i-th eigenvector
*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
*          ISUPPZ( 2*i ).
********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
*
*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*
*  LWORK   (input) INTEGER
*          The dimension of the array WORK.  LWORK >= max(1,26*N).
*          For optimal efficiency, LWORK >= (NB+6)*N,
*          where NB is the max of the blocksize for DSYTRD and DORMTR
*          returned by ILAENV.
*
*          If LWORK = -1, then a workspace query is assumed; the routine
*          only calculates the optimal size of the WORK array, returns
*          this value as the first entry of the WORK array, and no error
*          message related to LWORK is issued by XERBLA.
*
*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
*          On exit, if INFO = 0, IWORK(1) returns the optimal LWORK.
*
*  LIWORK  (input) INTEGER
*          The dimension of the array IWORK.  LIWORK >= max(1,10*N).
*
*          If LIWORK = -1, then a workspace query is assumed; the
*          routine only calculates the optimal size of the IWORK array,
*          returns this value as the first entry of the IWORK array, and
*          no error message related to LIWORK is issued by XERBLA.
*
*  INFO    (output) INTEGER
*          = 0:  successful exit
*          < 0:  if INFO = -i, the i-th argument had an illegal value
*          > 0:  Internal error
*/
int
Dsyevr(char jobz, char range, char uplo, long int n, 
       double **a, long int lda,
       double vl, double vu,
       long int il, long int iu,
       double abstol, long int m, double *w, 
       double **z__, long int ldz, long int *isuppz,
       double *work, long int lwork,
       long int *iwork, long int liwork)
{
    long int        info;

    DSYEVR(&jobz, &range, &uplo, &n, 
            &a[0][0], &lda, &vl, &vu,
            &il, &iu,
            &abstol, &m, w, 
            &z__[0][0], &ldz, isuppz,
            work, &lwork, iwork, &liwork, &info);

    return((int) info);
}


/* LAPACK DSYEVR() computes selected eigenvalues, and optionally, eigenvectors of a
   real symmetric matrix. 
   This fxn destroys the input 'mat' */
int
dsyevr_opt_dest(double **mat, long int n,
                long int lower, long int upper,
                double *evals, double **evecs,
                double abstol)
{
    double        **z__ = evecs;
    double         *w = evals;
    double          vl, vu;
    long int        m, lwork = -1, liwork = -1;
    long int        il = lower, iu = upper;
    long int        info, lda = n, ldz = n;
    long int       *isuppz = malloc(2 * (iu - il + 1) * sizeof(long int));
    double         *work = malloc(sizeof(double));
    long int       *iwork = malloc(sizeof(long int));
    char            jobz = 'V', range = 'I', uplo = 'U';

    DSYEVR(&jobz, &range, &uplo, &n, 
            &mat[0][0], &lda, &vl, &vu,
            &il, &iu,
            &abstol, &m, w, 
            &z__[0][0], &ldz, isuppz,
            work, &lwork, iwork, &liwork, &info);

    lwork = work[0];
    liwork = iwork[0];
    free(work);
    free(iwork);
    work = malloc(lwork * sizeof(double));
    iwork = malloc(liwork * sizeof(long int));

	if (work == NULL || iwork == NULL)
	{
		fprintf(stderr, "\n\n");
		fprintf(stderr,
				" ERROR116: could not allocate memory for work space in dsyevr_opt_dest() (%li, %li)  \n\n",
				lwork, liwork);
		fflush(NULL);
		exit(EXIT_FAILURE);
	}

    /* MatPrint(mat, n); */

    DSYEVR(&jobz, &range, &uplo, &n, 
            &mat[0][0], &lda, &vl, &vu,
            &il, &iu,
            &abstol, &m, w, 
            &z__[0][0], &ldz, isuppz,
            work, &lwork, iwork, &liwork, &info);

    if (info > 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR117: LAPACK dsyevr() complete eigenvalue/eigenvector solving choked; \n");
        fprintf(stderr, "           the algorithm failed to converge; %ld \n", info);
        fprintf(stderr, "           off-diagonal elements of an intermediate tridiagonal \n");
        fprintf(stderr, "           form did not converge to zero \n\n");
        fflush(NULL);
        exit(EXIT_FAILURE);
    }
    else if (info < 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR118: the %ld-th argument had an illegal value \n\n", info);
        fflush(NULL);
        exit(EXIT_FAILURE);
    }

    free(work);
    free(iwork);
    free(isuppz);

    return((int) info);
}


/* LAPACK DSYEVR() computes selected eigenvalues, and optionally, eigenvectors of a
   real symmetric matrix. 
   This fxn destroys the input 'mat' */
int
dsyevr_dest(double **mat, long int n,
            long int lower, long int upper,
            double *evals, double **evecs,
            double abstol)
{
    double        **z__ = evecs;
    double         *w = evals;
    double          vl, vu;
    long int        m, lwork = -1, liwork = -1;
    long int        il = lower, iu = upper;
    long int        info, lda = n, ldz = n;
    long int       *isuppz = malloc(2 * (iu - il + 1) * sizeof(long int));
    double         *work;
    long int       *iwork;
    char            jobz = 'V', range = 'I', uplo = 'U';

    lwork = 26 * n;
    liwork = 10 * n;
    work = malloc(lwork * sizeof(double));
    iwork = malloc(liwork * sizeof(long int));

	if (work == NULL || iwork == NULL)
	{
		fprintf(stderr, "\n\n");
		fprintf(stderr,
				" ERROR116: could not allocate memory for work space in dsyevr_opt_dest() (%li, %li)  \n\n",
				lwork, liwork);
		fflush(NULL);
		exit(EXIT_FAILURE);
	}

    /* MatPrint(mat, n); */

    DSYEVR(&jobz, &range, &uplo, &n, 
            &mat[0][0], &lda, &vl, &vu,
            &il, &iu,
            &abstol, &m, w, 
            &z__[0][0], &ldz, isuppz,
            work, &lwork, iwork, &liwork, &info);

    if (info > 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR117: LAPACK dsyevr() complete eigenvalue/eigenvector solving choked; \n");
        fprintf(stderr, "           the algorithm failed to converge; %ld \n", info);
        fprintf(stderr, "           off-diagonal elements of an intermediate tridiagonal \n");
        fprintf(stderr, "           form did not converge to zero \n\n");
        fflush(NULL);
        exit(EXIT_FAILURE);
    }
    else if (info < 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR118: the %ld-th argument had an illegal value \n\n", info);
        fflush(NULL);
        exit(EXIT_FAILURE);
    }

    free(work);
    free(iwork);
    free(isuppz);

    return((int) info);
}



/* LAPACK DSYEVR() computes selected eigenvalues, and optionally, eigenvectors of a
   real symmetric matrix. 
   This fxn does not destroy the input 'amat' */
int
dsyevr_opt_save(const double **amat, long int n,
                long int lower, long int upper,
                double *evals, double **evecs)
{
    double        **z__ = evecs;
    double         *w = evals;
    double          vl, vu;
    long int        lwork = -1, liwork = -1;
    long int        il = lower, iu = upper;
    long int        info, lda = n, ldz = n;
    long int       *isuppz = malloc(2 * (iu - il + 1) * sizeof(long int));
    double         *work = malloc(sizeof(double));
    long int       *iwork = malloc(sizeof(long int));
    char            jobz = 'V', range = 'I', uplo = 'U';
    double          abstol = 0.0;
    long int        m = n; 
    double        **mat = MatInit(m, n);

    memcpy(&mat[0][0], &amat[0][0], n * n * sizeof(double));

    DSYEVR(&jobz, &range, &uplo, &n, 
            &mat[0][0], &lda, &vl, &vu,
            &il, &iu,
            &abstol, &m, w, 
            &z__[0][0], &ldz, isuppz,
            work, &lwork, iwork, &liwork, &info);

    lwork = work[0];
    liwork = iwork[0];
    free(work);
    free(iwork);
    work = malloc(lwork * sizeof(double));
    iwork = malloc(liwork * sizeof(long int));

    /* MatPrint(mat, n); */

    DSYEVR(&jobz, &range, &uplo, &n, 
            &mat[0][0], &lda, &vl, &vu,
            &il, &iu,
            &abstol, &m, w, 
            &z__[0][0], &ldz, isuppz,
            work, &lwork, iwork, &liwork, &info);

    if (info > 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR117: LAPACK dsyevr() complete eigenvalue/eigenvector solving choked; \n");
        fprintf(stderr, "           the algorithm failed to converge; %ld \n", info);
        fprintf(stderr, "           off-diagonal elements of an intermediate tridiagonal \n");
        fprintf(stderr, "           form did not converge to zero \n\n");
    }
    else if (info < 0)
    {
        fprintf(stderr, "\n\n");
        fprintf(stderr, " ERROR118: the %ld-th argument had an illegal value \n\n", info);
    }

    free(work);
    free(iwork);
    free(isuppz);
    MatDestroy(mat);

    return((int) info);
}


void
dpotr_invert(double **mat, int idim)
{
    int                      i, j;
    long int                 info = 0;
    char                     upper = 'U';
    long int                 dim = (long int) idim;

    /* LAPACK dpotrf and dpotri compute the inverse using cholesky decomposition */
    DPOTRF(&upper, &dim, &mat[0][0], &dim, &info);

    if (info == 0)
        DPOTRI(&upper, &dim, &mat[0][0], &dim, &info);

    else if (info > 0)
    {
        fprintf(stderr, " \n\n ERROR113: LAPACK dpotrf Choleski decomposition choked; \n");
        fprintf(stderr, "           covariance matrix is singular; \n");
        fprintf(stderr, "           leading minor of order %ld is not positive definite \n\n", info);
        exit(EXIT_FAILURE);
    }

    /* copy lower to upper */
    for (i = 0; i < dim; ++i)
        for (j = 0; j < i; ++j)
            mat[j][i] = mat[i][j];
}



/* LAPACK dpotrf computes the cholesky decomposition */
/*
      SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
*
*  -- LAPACK routine (version 3.0) --
*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
*     Courant Institute, Argonne National Lab, and Rice University
*     March 31, 1993
*
*     .. Scalar Arguments ..
      CHARACTER          UPLO
      INTEGER            INFO, LDA, N
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION   A( LDA, * )
*     ..
*
*  Purpose
*  =======
*
*  DPOTRF computes the Cholesky factorization of a real symmetric
*  positive definite matrix A.
*
*  The factorization has the form
*     A = U**T * U,  if UPLO = 'U', or
*     A = L  * L**T,  if UPLO = 'L',
*  where U is an upper triangular matrix and L is lower triangular.
*
*  This is the block version of the algorithm, calling Level 3 BLAS.
*
*  Arguments
*  =========
*
*  UPLO    (input) CHARACTER*1
*          = 'U':  Upper triangle of A is stored;
*          = 'L':  Lower triangle of A is stored.
*
*  N       (input) INTEGER
*          The order of the matrix A.  N >= 0.
*
*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
*          N-by-N upper triangular part of A contains the upper
*          triangular part of the matrix A, and the strictly lower
*          triangular part of A is not referenced.  If UPLO = 'L', the
*          leading N-by-N lower triangular part of A contains the lower
*          triangular part of the matrix A, and the strictly upper
*          triangular part of A is not referenced.
*
*          On exit, if INFO = 0, the factor U or L from the Cholesky
*          factorization A = U**T*U or A = L*L**T.
*
*  LDA     (input) INTEGER
*          The leading dimension of the array A.  LDA >= max(1,N).
*
*  INFO    (output) INTEGER
*          = 0:  successful exit
*          < 0:  if INFO = -i, the i-th argument had an illegal value
*          > 0:  if INFO = i, the leading minor of order i is not
*                positive definite, and the factorization could not be
*                completed.
*
*  =====================================================================
*/
int
dpotrf_opt_dest(double **amat, long int ndim)
{
    long int        info = 0;
    char            uplo = 'L'; /* NOTE: this is actually the upper */
    int             i, j;

    DPOTRF(&uplo, &ndim, &amat[0][0], &ndim, &info);

    for (i = 0; i < ndim; ++i)
        for (j = 0; j < i; ++j)
            amat[i][j] = 0.0;

    return((int) info);
}


/* Calculates the Moore-Penrose pseudoinverse of a symmetric, square matrix.
   Uses DGESVD to do the singular value decomposition inmat = U S V^T .
   Then constructs the pseudoinverse by V S^-1 U^T . 
   Note that here S^-1 is the inverse of only the nonzero elements of S.
   Also note that LAPACK uses Fortran-style transposed matrices so we have
   to account for that in the matrix multiplication, since U & V are
   asymmetric in general. */
int
pseudoinv_sym(double **inmat, double **outmat, long int n, const double tol)
{
    long int        info;
    char            jobu = 'A';
    char            jobvt = 'A';
    long int        lda = n, ldu = n, ldvt = n, m = n;
    long int        lwork = -1;
    double         *work;
    double        **u = MatInit(n, n);
    double        **vt = MatInit(n, n);
    double         *s = malloc(n * sizeof(double));
    double        **a = outmat;
    int             i, j, k;

    if (inmat != outmat)
        memcpy(&a[0][0], &inmat[0][0], n * n * sizeof(double));

/*     for (i = 0; i < n; ++i) */
/*         for (j = 0; j < n; ++j) */
/*             a[i][j] = inmat[j][i]; */

    work = (double *) malloc(sizeof(double));

    DGESVD(&jobu, &jobvt, &m, &n, 
            &a[0][0], &lda, s,
            &u[0][0], &ldu, 
            &vt[0][0], &ldvt,
            work, &lwork, 
            &info);

    lwork = work[0];
    free(work);
    work = (double *) malloc(lwork * sizeof(double));

    DGESVD(&jobu, &jobvt, &m, &n, 
            &a[0][0], &lda, s,
            &u[0][0], &ldu, 
            &vt[0][0], &ldvt,
            work, &lwork, 
            &info);

/*     write_C_mat((const double **) u, n, 5, 10); */
/*     write_C_mat((const double **) vt, n, 5, 10); */

    for (i = 0; i < n; ++i)
    {
        if (s[i] > tol)
            s[i] = 1.0 / s[i];
        else
            s[i] = 0.0;
    }

    /* (i x k)(k x j) = (i x j) */
    for (i = 0; i < n; ++i)
    {
        for (j = 0; j < n; ++j)
        {
            outmat[i][j] = 0.0;
            for (k = 0; k < n; ++k)
                outmat[i][j] += (u[k][i] * s[k] * vt[j][k]);
        } /* NB: LAPACK uses Fortran-style transposed matrices */
    }

    free(work);
    free(s);
    MatDestroy(u);
    MatDestroy(vt);

    return(info);
}
