/*
**
** LINPACK.JAVA     Linpack benchmark, calculates FLOPS.
**                  (FLoating Point Operations Per Second)
**
** Translated to C by Bonnie Toy 5/88
** Translated to JAVA by Baygulov Aleksei 10/98
**
** Modified by Will Menninger, 10/93, with these features:
**  (modified on 2/25/94  to fix a problem with daxpy  for
**   unequal increments or equal increments not equal to 1.
**     Jack Dongarra)
**
** - Defaults to double precision.
** - Averages ROLLed and UNROLLed performance.
** - User selectable array sizes.
** - Automatically does enough repetitions to take at least 40 CPU seconds.
** - Prints machine precision.
** - ANSI prototyping.
**
**
**
*/


import java.io.DataInputStream;
import java.io.IOException;
import java.lang.Integer;
//import java.util.Date;

class linnew
{
  final static double ONE  = 1.0e0;
  final static double ZERO = 0.0e0;
  final static byte   BASE10DIG=14;
  /*final*/  static String PREC="Double";

 public static void main(String args[])
  throws IOException
   {
    String  buf;
    int arsize;
    int     nreps;
    DataInputStream keybordInput = new DataInputStream(System.in);

    while (true)
        {
        System.out.println("Enter array size (q to quit) [200]:  ");
        try
        {
          buf=keybordInput.readLine();
        } catch ( IOException e )
        {
           buf = "";
        }
        if ( buf.length()==0 ) 
            arsize=200;
        else
          {
           if (buf.charAt(0)=='q' || buf.charAt(0)=='Q')
                break;
           try
           {
            arsize = Integer.parseInt(buf,10);
           } catch (NumberFormatException e)
            {
             System.out.println("Wrong format of array size or too big.");
             continue;
            }
          }
        arsize/=2;
        arsize*=2;
        if (arsize<10)
            {
            System.out.println("Too small.");
            continue;
            }
        System.out.print("\n\nLINPACK benchmark, "+PREC+" precision.\n");
        System.out.print("Machine precision:  "+BASE10DIG+" digits.\n");
        System.out.print("Array size "+arsize+" X "+arsize+".\n");
        System.out.print("Average rolled and unrolled performance:\n\n");
        System.out.print("    Reps Time(s)  DGEFA   DGESL  OVERHEAD    KFLOPS\n");
        System.out.print("----------------------------------------------------\n");
        nreps=1;
        while (linpack(nreps,arsize)<40.)
            nreps*=2;
        System.out.print("\n");
        }
    }


 private static String format(int k,int length)

   {
     String result;

     result  = Integer.toString(k);
     for ( int i = result.length() ;( i<length ) ;i++)
       result = " "+result;
     return result;
   }

 private static String format(double d,int length)

   {
     String result;

     result = Double.toString(d);
     int pos = result.indexOf(".")+3;
     if (pos>result.length())
         {
         for (int i=result.length(); ( i<pos ); i++)
           result += "0";
         }
     else
       result = result.substring(0,pos);
     for (int i = result.length() ;( i<length ) ;i++)
        result = " "+result;
     return result;
   }


 private static double linpack(int nreps,int arsize)

    {
    double[][] a;
    double[]   b;
    int[]      ipvt;
    double     norma,t1,kflops,tdgesl,tdgefa,totalt,toverhead,ops;
    int        n,info,lda;
    int        i;



    lda = arsize;
    n = arsize/2;
    ops=((2.0*n*n*n)/3.0+2.0*n*n);
    try {
      a=new double[arsize][arsize];
      b=new double[arsize];
      ipvt=new int [arsize];
    } catch ( OutOfMemoryError e ) {
      System.out.println("Not enough memory available for given array size.");
      return 50.;
    }
    tdgesl=0;
    tdgefa=0;
    totalt=second();
    norma = 0.0;
    info = 0;
    for (i=0;i<nreps;i++)
        {
        matgen(a,lda,n,b,norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,info,1);
        tdgefa += second()-t1;
        t1 = second();     
        dgesl(a,lda,n,ipvt,b,0,1);
        tdgesl += second()-t1;
        }
    for (i=0;i<nreps;i++)
        {
        matgen(a,lda,n,b,norma);
        t1 = second();
        dgefa(a,lda,n,ipvt,info,0);
        tdgefa += second()-t1;
        t1 = second();
        dgesl(a,lda,n,ipvt,b,0,0);
        tdgesl += second()-t1;
        }
    totalt=second()-totalt;
    if (totalt<0.5 || tdgefa+tdgesl<0.2)
        return(0.);
    kflops=2.*nreps*ops/(1000.*(tdgefa+tdgesl));
    toverhead=totalt-tdgefa-tdgesl;
    if (tdgefa<0.)
        tdgefa=0.;
    if (tdgesl<0.)
        tdgesl=0.;
    if (toverhead<0.)
        toverhead=0.;


    System.out.print(format(nreps,8)+format(totalt,7)+
                     format(100.*tdgefa/totalt,8)+
                     "%"+format(100.*tdgesl/totalt,7)+"%"
                     +format(100.*toverhead/totalt,7)
                      +"%"+format(kflops,12)+"\n");
    return(totalt);
    }


 static void matgen(double[][] a,int  lda,int  n,double[] b,double norma)

    {
    int   init,i,j;

    init = 1325;
    norma = 0.0;
    for (j = 0; j < n; j++)
        for (i = 0; i < n; i++)
            {
            init = (int)(3125*init % 65536L);
            a[j][i] = (init - 32768.0)/16384.0;
            norma = (a[j][i] > norma) ? a[j][i] : norma;
            }
    for (i = 0; i < n; i++)
        b[i] = 0.0;
    for (j = 0; j < n; j++)
        for (i = 0; i < n; i++)
            b[i] = b[i] + a[j][i];
    }


/*
**
** DGEFA benchmark
**
**
**   dgefa factors a double precision matrix by gaussian elimination.
**
**   dgefa is usually called by dgeco, but it can be called
**   directly with a saving in time if  rcond  is not needed.
**   (time for dgeco) = (1 + 9/n)*(time for dgefa) .
**
**   on entry
**
**      a       double precision[n][lda]
**              the matrix to be factored.
**
**      lda     integer
**              the leading dimension of the array  a .
**
**      n       integer
**              the order of the matrix  a .
**
**   on return
**
**      a       an upper triangular matrix and the multipliers
**              which were used to obtain it.
**              the factorization can be written  a = l*u  where
**              l  is a product of permutation and unit lower
**              triangular matrices and  u  is upper triangular.
**
**      ipvt    integer[n]
**              an integer vector of pivot indices.
**
**      info    integer
**              = 0  normal value.
**              = k  if  u[k][k] .eq. 0.0 .  this is not an error
**                   condition for this subroutine, but it does
**                   indicate that dgesl or dgedi will divide by zero
**                   if called.  use  rcond  in dgeco for a reliable
**                   indication of singularity.
**
**   linpack. this version dated 08/14/78 .
**   cleve moler, university of New Mexico, argonne national lab.
**
**   functions
**
**   blas daxpy,dscal,idamax
**
*/
 private static void dgefa(double[][] a,int lda,int n,int[] ipvt,int info,int roll)

    {
    double t;
    int    j,k,kp1,l,nm1;

    /* gaussian elimination with partial pivoting */

    if (roll==1)
        {
        info = 0;
        nm1 = n - 1;
        if (nm1 >=  0)
            for (k = 0; k < nm1; k++)
                {
                kp1 = k + 1;

                /* find l = pivot index */

                l = idamax(n-k,k,a[k],1) + k;
                ipvt[k] = l;

                /* zero pivot implies this column already
                   triangularized */

                if (a[k][l] != ZERO)
                    {

                    /* interchange if necessary */

                    if (l != k)
                        {
                        t = a[k][l];
                        a[k][l] = a[k][k];
                        a[k][k] = t;
                        }

                    /* compute multipliers */

                    t = -ONE/a[k][k];
                    dscal_r(n-(k+1),k+1,t,a[k],1);

                    /* row elimination with column indexing */

                    for (j = kp1; j < n; j++)
                        {
                        t = a[j][l];
                        if (l != k)
                            {
                            a[j][l] = a[j][k];
                            a[j][k] = t;
                            }
                        daxpy_r(n-(k+1),k+1,t,a[k],1,a[j],1);
                        }
                    }
                else
                    info = k;
                }
        ipvt[n-1] = n-1;
        if (a[n-1][n-1] == ZERO)
             info = n-1;
        }
    else
        {
        info = 0;
        nm1 = n - 1;
        if (nm1 >=  0)
            for (k = 0; k < nm1; k++)
                {
                kp1 = k + 1;

                /* find l = pivot index */

                l = idamax(n-k,k,a[k],1) + k;
                ipvt[k] = l;

                /* zero pivot implies this column already
                   triangularized */

                if (a[k][l] != ZERO)
                    {

                    /* interchange if necessary */

                    if (l != k)
                        {
                        t = a[k][l];
                        a[k][l] = a[k][k];
                        a[k][k] = t;
                        }

                    /* compute multipliers */

                    t = -ONE/a[k][k];
                    dscal_ur(n-(k+1),k+1,t,a[k],1);

                    /* row elimination with column indexing */

                    for (j = kp1; j < n; j++)
                        {
                        t = a[j][l];
                        if (l != k)
                            {
                            a[j][l] = a[j][k];
                            a[j][k] = t;
                            }
                        daxpy_ur(n-(k+1),k+1,t,a[k],1,a[j],1);
                        }
                    }
                else
                    info = k;
                }
        ipvt[n-1] = n-1;
        if (a[n-1][n-1] == ZERO)
            info = n-1;
        }
    }


/*
**
** DGESL benchmark
**
**
**   dgesl solves the double precision system
**   a * x = b  or  trans(a) * x = b
**   using the factors computed by dgeco or dgefa.
**
**   on entry
**
**      a       double precision[n][lda]
**              the output from dgeco or dgefa.
**
**      lda     integer
**              the leading dimension of the array  a .
**
**      n       integer
**              the order of the matrix  a .
**
**      ipvt    integer[n]
**              the pivot vector from dgeco or dgefa.
**
**      b       double precision[n]
**              the right hand side vector.
**
**      job     integer
**              = 0         to solve  a*x = b ,
**              = nonzero   to solve  trans(a)*x = b  where
**                          trans(a)  is the transpose.
**
**  on return
**
**      b       the solution vector  x .
**
**   error condition
**
**      a division by zero will occur if the input factor contains a
**      zero on the diagonal.  technically this indicates singularity
**      but it is often caused by improper arguments or improper
**      setting of lda .  it will not occur if the subroutines are
**      called correctly and if dgeco has set rcond .gt. 0.0
**      or dgefa has set info .eq. 0 .
**
**   to compute  inverse(a) * c  where  c  is a matrix
**   with  p  columns
**         dgeco(a,lda,n,ipvt,rcond,z)
**         if (!rcond is too small){
**              for (j=0,j<p,j++)
**                      dgesl(a,lda,n,ipvt,c[j][0],0);
**         }
**
**   linpack. this version dated 08/14/78 .
**   cleve moler, university of new mexico, argonne national lab.
**
**   functions
**
**   blas daxpy,ddot
*/
 private static void dgesl(double[][] a,int lda,int n,int[] ipvt,double[] b,int job,int roll)

    {
    double  t;
    int     k,kb,l,nm1;

    if (roll==1)
        {
        nm1 = n - 1;
        if (job == 0)
            {

            /* job = 0 , solve  a * x = b   */
            /* first solve  l*y = b         */

            if (nm1 >= 1)
                for (k = 0; k < nm1; k++)
                    {
                    l = ipvt[k];
                    t = b[l];
                    if (l != k)
                        {
                        b[l] = b[k];
                        b[k] = t;
                        }
                    daxpy_r(n-(k+1),k+1,t,a[k],1,b,1);
                    }

            /* now solve  u*x = y */

            for (kb = 0; kb < n; kb++)
                {
                k = n - (kb + 1);
                b[k] = b[k]/a[k][k];
                t = -b[k];
                daxpy_r(k,0,t,a[k],1,b,1);
                }
            }
        else
            {

            /* job = nonzero, solve  trans(a) * x = b  */
            /* first solve  trans(u)*y = b             */

            for (k = 0; k < n; k++)
                {
                t = ddot_r(k,0,a[k],1,b,1);
                b[k] = (b[k] - t)/a[k][k];
                }

            /* now solve trans(l)*x = y     */

            if (nm1 >= 1)
                for (kb = 1; kb < nm1; kb++)
                    {
                    k = n - (kb+1);
                    b[k] = b[k] + ddot_r(n-(k+1),k+1,a[k],1,b,1);
                    l = ipvt[k];
                    if (l != k)
                        {
                        t = b[l];
                        b[l] = b[k];
                        b[k] = t;
                        }
                    }
            }
        }
    else
        {
        nm1 = n - 1;
        if (job == 0)
            {

            /* job = 0 , solve  a * x = b   */
            /* first solve  l*y = b         */

            if (nm1 >= 1)
                for (k = 0; k < nm1; k++)
                    {
                    l = ipvt[k];
                    t = b[l];
                    if (l != k)
                        {
                        b[l] = b[k];
                        b[k] = t;
                        }
                    daxpy_ur(n-(k+1),k+1,t,a[k],1,b,1);
                    }

            /* now solve  u*x = y */

            for (kb = 0; kb < n; kb++)
                {
                k = n - (kb + 1);
                b[k] = b[k]/a[k][k];
                t = -b[k];
                daxpy_ur(k,0,t,a[k],1,b,1);
                }
            }
        else
            {

            /* job = nonzero, solve  trans(a) * x = b  */
            /* first solve  trans(u)*y = b             */

            for (k = 0; k < n; k++)
                {
                t = ddot_ur(k,0,a[k],1,b,1);
                b[k] = (b[k] - t)/a[k][k];
                }

            /* now solve trans(l)*x = y     */

            if (nm1 >= 1)
                for (kb = 1; kb < nm1; kb++)
                    {
                    k = n - (kb+1);
                    b[k] = b[k] + ddot_ur(n-(k+1),k+1,a[k],1,b,1);
                    l = ipvt[k];
                    if (l != k)
                        {
                        t = b[l];
                        b[l] = b[k];
                        b[k] = t;
                        }
                    }
            }
        }
    }



/*
** Constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
 private static void daxpy_r(int n,int first,double da,double[] dx,int incx,
                             double[] dy,int incy)

    {
    int i,ix,iy;

    if (n <= 0)
        return;
    if (da == ZERO)
        return;

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 1;
        iy = 1;
        if(incx < 0) ix = (-n+1)*incx + 1;
        if(incy < 0)iy = (-n+1)*incy + 1;
        for (i = 0;i < n; i++)
            {
            dy[iy+first] = dy[iy+first] + da*dx[ix+first];
            ix = ix + incx;
            iy = iy + incy;
            }
        return;
        }

    /* code for both increments equal to 1 */

    for (i = first+0;i < first+n; i++)
        dy[i] = dy[i] + da*dx[i];
    }


/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
 private static double ddot_r(int n,int first,double[] dx,int incx,double[] dy,int incy)

    {
    double dtemp;
    int i,ix,iy;

    dtemp = ZERO;

    if (n <= 0)
        return(ZERO);

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 0;
        iy = 0;
        if (incx < 0) ix = (-n+1)*incx;
        if (incy < 0) iy = (-n+1)*incy;
        for (i = 0;i < n; i++)
            {
            dtemp = dtemp + dx[ix+first]*dy[iy+first];
            ix = ix + incx;
            iy = iy + incy;
            }
        return(dtemp);
        }

    /* code for both increments equal to 1 */

    for (i = first+0;i < first+n; i++)
        dtemp = dtemp + dx[i]*dy[i];
    return(dtemp);
    }


/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** ROLLED version
*/
 private static void dscal_r(int n,int first,double da,double[] dx,int incx)

    {
    int i,nincx;

    if (n <= 0)
        return;
    if (incx != 1)
        {

        /* code for increment not equal to 1 */

        nincx = n*incx;
        for (i = first+0; i < first+nincx; i = i + incx)
            dx[i] = da*dx[i];
        return;
        }

    /* code for increment equal to 1 */

    for (i = first+0; i < first+n; i++)
        dx[i] = da*dx[i];
    }


/*
** constant times a vector plus a vector.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
 private static void daxpy_ur(int n,int first,double da,double[] dx,int incx,
                              double[] dy,int incy)

    {
    int i,ix,iy,m;

    if (n <= 0)
        return;
    if (da == ZERO)
        return;

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 1;
        iy = 1;
        if(incx < 0) ix = (-n+1)*incx + 1;
        if(incy < 0)iy = (-n+1)*incy + 1;
        for (i = 0;i < n; i++)
            {
            dy[iy+first] = dy[iy+first] + da*dx[ix+first];
            ix = ix + incx;
            iy = iy + incy;
            }
        return;
        }

    /* code for both increments equal to 1 */

    m = n % 4;
    if ( m != 0)
        {
        for (i = first+0; i < first+m; i++)
            dy[i] = dy[i] + da*dx[i];
        if (n < 4)
            return;
        }
    for (i = first+m; i < first+n; i = i + 4)
        {
        dy[i] = dy[i] + da*dx[i];
        dy[i+1] = dy[i+1] + da*dx[i+1];
        dy[i+2] = dy[i+2] + da*dx[i+2];
        dy[i+3] = dy[i+3] + da*dx[i+3];
        }
    }


/*
** Forms the dot product of two vectors.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
 private static double ddot_ur(int n,int first,double[] dx,int incx,double[] dy,int incy)

    {
    double dtemp;
    int i,ix,iy,m;

    dtemp = ZERO;

    if (n <= 0)
        return(ZERO);

    if (incx != 1 || incy != 1)
        {

        /* code for unequal increments or equal increments != 1 */

        ix = 0;
        iy = 0;
        if (incx < 0) ix = (-n+1)*incx;
        if (incy < 0) iy = (-n+1)*incy;
        for (i = 0;i < n; i++)
            {
            dtemp = dtemp + dx[ix+first]*dy[iy+first];
            ix = ix + incx;
            iy = iy + incy;
            }
        return(dtemp);
        }

    /* code for both increments equal to 1 */

    m = n % 5;
    if (m != 0)
        {
        for (i = first+0; i < first+m; i++)
            dtemp = dtemp + dx[i]*dy[i];
        if (n < 5)
            return(dtemp);
        }
    for (i = first+m; i < first+n; i = i + 5)
        {
        dtemp = dtemp + dx[i]*dy[i] +
        dx[i+1]*dy[i+1] + dx[i+2]*dy[i+2] +
        dx[i+3]*dy[i+3] + dx[i+4]*dy[i+4];
        }
    return(dtemp);
    }


/*
** Scales a vector by a constant.
** Jack Dongarra, linpack, 3/11/78.
** UNROLLED version
*/
 private static void dscal_ur(int n,int first,double da,double[] dx,int incx)

    {
    int i,m,nincx;

    if (n <= 0)
        return;
    if (incx != 1)
        {

        /* code for increment not equal to 1 */

        nincx = n*incx;
        for (i = 0; i < nincx; i = i + incx)
            dx[i+first] = da*dx[i+first];
        return;
        }

    /* code for increment equal to 1 */

    m = n % 5;
    if (m != 0)
        {
        for (i = first+0; i < first+m; i++)
            dx[i] = da*dx[i];
        if (n < 5)
            return;
        }
    for (i = first+m; i < first+n; i = i + 5)
        {
        dx[i] = da*dx[i];
        dx[i+1] = da*dx[i+1];
        dx[i+2] = da*dx[i+2];
        dx[i+3] = da*dx[i+3];
        dx[i+4] = da*dx[i+4];
        }
    }


/*
** Finds the index of element having max. absolute value.
** Jack Dongarra, linpack, 3/11/78.
*/
 private static int idamax(int n,int first,double[] dx,int incx)

    {
    double dmax;
    int i, ix, itemp=0;

    if (n < 1)
        return(-1);
    if (n ==1 )
        return(0);
    if(incx != 1)
        {

        /* code for increment not equal to 1 */

        ix = 1;
        dmax = Math.abs(dx[0+first]);
        ix = ix + incx;
        for (i = 1; i < n; i++)
            {
            if(Math.abs(dx[ix+first]) > dmax)
                {
                itemp = i;
                dmax = Math.abs(dx[ix+first]);
                }
            ix = ix + incx;
            }
        }
    else
        {

        /* code for increment equal to 1 */

        itemp = 0;
        dmax = Math.abs(dx[0+first]);
        for (i = 1; i < n; i++)
            if(Math.abs(dx[i+first]) > dmax)
                {
                itemp = i;
                dmax = Math.abs(dx[i+first]);
                }
        }
    return itemp;
    }

 static double MillisecondsInSecond = 1000;
 private static double second()

    {  
     
      return (double)((double)System.currentTimeMillis()/(double)MillisecondsInSecond);

    }

 } //end of class linnew

