    /*        Fast GEMM routine for Alpha 21164/21264      */
    /*         on  Linux, Digital UNIX                     */
    /*        by Kazushige Goto <goto@statabo.rim.or.jp>   */

#include <ctype.h>
#include "common.h"

int GEMMC_(transa, transb, m, n, k, alpha, a, lda, b, ldb, 
	beta, c, ldc)
char *transa, *transb;
int *m, *n, *k;
FLOAT *alpha, *a;
int *lda;
FLOAT *b;
int *ldb;
FLOAT *beta, *c;
int *ldc;
{
    int a_dim1, a_offset, b_dim1, b_offset, c_dim1,
            c_offset, i__1, i__2, i__3;

    /* Local variables */
    int info;
    int nota, notb;
    FLOAT temp;
    int i, j, l, ncola;
    int nrowa, nrowb;


    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = a_dim1 + 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = b_dim1 + 1;
    b -= b_offset;
    c_dim1 = *ldc;
    c_offset = c_dim1 + 1;
    c -= c_offset;

    /* Function Body */
    nota = ((toupper(*transa) == 'N') || (toupper(*transa) == 'R'));
    notb = ((toupper(*transb) == 'N') || (toupper(*transb) == 'R'));
    if (nota) {
      nrowa = *m;
      ncola = *k;
    } else {
      nrowa = *k;
      ncola = *m;
    }
    if (notb) {
      nrowb = *k;
    } else {
      nrowb = *n;
    }

    info = 0;

    /*     Quick return if possible. */
    
    if ((*m == 0) || (*n == 0) || 
	((*alpha == 0. || *k == 0) && *beta == 1.)) return 0;

    /*     And if  alpha.eq.zero. */

    if (*alpha == 0.) {
      if (*beta == 0.) {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	  i__2 = *m;
	  for (i = 1; i <= i__2; ++i) {
	    c[i + j * c_dim1] = 0.;
	  }
	}
      } else {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	  i__2 = *m;
	  for (i = 1; i <= i__2; ++i) {
	    c[i + j * c_dim1] = *beta * c[i + j * c_dim1];
	  }
	}
      }
      return 0;
    }
    
    /*     Start the operations. */
    
    if (notb) {
      if (nota) {
	/*           Form  C := alpha*A*B + beta*C. */
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	  if (*beta == 0.) {
	    i__2 = *m;
	    for (i = 1; i <= i__2; ++i) {
	      c[i + j * c_dim1] = 0.;
	    }
	  } else if (*beta != 1.) {
	    i__2 = *m;
	    for (i = 1; i <= i__2; ++i) {
	      c[i + j * c_dim1] = *beta * c[i + j * c_dim1];
	    }
	  }
	  i__2 = *k;
	  for (l = 1; l <= i__2; ++l) {
	    if (b[l + j * b_dim1] != 0.) {
	      temp = *alpha * b[l + j * b_dim1];
	      i__3 = *m;
	      for (i = 1; i <= i__3; ++i) {
		c[i + j * c_dim1] += temp * a[i + l * a_dim1];
	      }
	    }
	  }
	}
      } else {
	/*           Form  C := alpha*A'*B + beta*C */
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	  i__2 = *m;
	  for (i = 1; i <= i__2; ++i) {
	    temp = 0.;
	    i__3 = *k;
	    for (l = 1; l <= i__3; ++l) {
	      temp += a[l + i * a_dim1] * b[l + j * b_dim1];
	    }
	    if (*beta == 0.) {
	      c[i + j * c_dim1] = *alpha * temp;
	    } else {
	      c[i + j * c_dim1] = *alpha * temp + *beta * c[i + j * c_dim1];
	    }
	  }
	}
      }
    } else {
      if (nota) {
	/*           Form  C := alpha*A*B' + beta*C */
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
		if (*beta == 0.) {
		  i__2 = *m;
		  for (i = 1; i <= i__2; ++i) {
		    c[i + j * c_dim1] = 0.;
		  }
		} else if (*beta != 1.) {
		  i__2 = *m;
		  for (i = 1; i <= i__2; ++i) {
		    c[i + j * c_dim1] = *beta * c[i + j * c_dim1];
		  }
		}
		i__2 = *k;
		for (l = 1; l <= i__2; ++l) {
		  if (b[j + l * b_dim1] != 0.) {
		    temp = *alpha * b[j + l * b_dim1];
		    i__3 = *m;
		    for (i = 1; i <= i__3; ++i) {
		      c[i + j * c_dim1] += temp * a[i + l * a_dim1];
		    }
		  }
		}
	      }
      } else {
	
	/*           Form  C := alpha*A'*B' + beta*C */
	
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	  i__2 = *m;
	  for (i = 1; i <= i__2; ++i) {
	    temp = 0.;
	    i__3 = *k;
	    for (l = 1; l <= i__3; ++l) {
	      temp += a[l + i * a_dim1] * b[j + l * b_dim1];
	    }
	    if (*beta == 0.) {
	      c[i + j * c_dim1] = *alpha * temp;
	    } else {
	      c[i + j * c_dim1] = *alpha * temp + *beta * c[i + j * c_dim1];
	    }
	  }
	}
      }
    }
    
    return 0;
}
