#include <objres.h>
#include <objseq.h>
#include <sequtil.h>
#include <seqport.h>
#include <seqptrn.h>

/* defines */
#define MAXPAT 2048
#define SQRDBF 8192

/* functions - internal */

static ComPatPtr ComPatNew (ComPatPtr curcpp)
{
  ComPatPtr   cpp;

  cpp = (ComPatPtr) MemNew (sizeof (ComPat));
  cpp->next = NULL;
  cpp->min = 0;
  cpp->max = 0;
  cpp->res[0] = '\0';

  if (curcpp == NULL)
    return cpp;

  while (curcpp->next != NULL)
    curcpp = curcpp->next;
  curcpp->next = cpp;

  return cpp;
}

extern ComPatPtr ComPatFree (ComPatPtr headcpp)
{
  ComPatPtr   cpp;

  while (headcpp != NULL)
  {
    cpp = headcpp->next;
    headcpp->next = NULL;
    MemFree (headcpp);
    headcpp = cpp;
  }
  return headcpp;
}

static Int4 ComPatLenMax (ComPatPtr cpp)
{
  Int4 i = 0;

  while (cpp != NULL)
  {
    i += cpp->max;
    cpp = cpp->next;
  }

  return i;
}

extern ComPatPtr InvertPattern (ComPatPtr cpp)
{
  ComPatPtr cpphead, cppnew = NULL, cppnewhead = NULL;
  CharPtr   pat;
  Int4      i, n, maxpat;

  cpphead = cpp;
  while (cpp != NULL)
  {
    pat = cpp->res;
    while (*pat != '\0')
    {
      switch (*pat)
      {
       case 'A':
        *pat = 'T';
        break;
       case 'B':
        *pat = 'V';
        break;
       case 'C':
        *pat = 'G';
        break;
       case 'D':
        *pat = 'H';
        break;
       case 'G':
        *pat = 'C';
        break;
       case 'H':
        *pat = 'D';
        break;
       case 'K':
        *pat = 'M';
        break;
       case 'M':
        *pat = 'K';
        break;
       case 'R':
        *pat = 'Y';
        break;
       case 'T':
        *pat = 'A';
        break;
       case 'V':
        *pat = 'B';
        break;
       case 'Y':
        *pat = 'R';
        break;
       default:
        break;
      }
      pat++;
    }
    cpp = cpp->next;
  }

  cpp = cpphead;
  maxpat = 0;
  while (cpp != NULL)
  {
    maxpat++;
    cpp = cpp->next;
  }

  for (i = 0; i < maxpat; i++)
  {
    cpp = cpphead;
    for (n = i+1; n < maxpat; n++)
    {
      cpp = cpp->next;
    }
    cppnew = ComPatNew (cppnew);
    if (cppnew != NULL)
    {
      if (cppnewhead == NULL)
        cppnewhead = cppnew;
      cppnew->min = cpp->min;
      cppnew->max = cpp->max;
      for (n = 0; n < 5; n++)
        cppnew->res[n] = cpp->res[n];
    }
  }
  cpp = cpphead;
  ComPatFree (cpp);
  return cppnewhead;
}

static void AddResidues (CharPtr res, Char pat, Int2 moltype)
{
  Int4 i, n;
  CharPtr reshead;

  static CharPtr ambiresNA[] =
  {
    "AA",
    "BCGT",
    "CC",
    "DAGT",
    "GG",
    "HACT",
    "KGT",
    "MAC",
    "NACGT",
    "RAG",
    "SCG",
    "TT",
    "VACG",
    "WAT",
    "YCT",
    ""
  };

  static CharPtr ambiresAA[] =
  {
    "AA",
    "BDN",
    "CC",
    "DD",
    "EE",
    "FF",
    "GG",
    "HH",
    "II",
    "KK",
    "LL",
    "MM",
    "NN",
    "PP",
    "QQ",
    "RR",
    "SS",
    "TT",
    "UU", /* selenocysteine */
    "VV",
    "WW",
    "XABCDEFGHIKLMNPQRSTUVWXYZ*", /* any valid residue including stop codon */
    "YY",
    "ZEQ",
    "**", /* stop codon */
    ""
  };

  if (moltype != 0 && moltype != 1)
    return;

  reshead = res;
  i = 0;
  if (moltype == 0)
  {
    while (ambiresNA[i] != NULL && ambiresNA[i][0] != '\0')
    {
      if (ambiresNA[i][0] == pat)
      {
        n = 1;
        while (ambiresNA[i][n] != '\0')
        {
          res = reshead;
          while (*res != '\0')
          {
            if (*res == ambiresNA[i][n])
              break;
            res++;
          }
          if (*res == '\0')
          {
            *res++ = ambiresNA[i][n];
            *res = '\0';
          }
          n++;
        }
        return;
      }
      i++;
    }
  }
  else
  {
    while (ambiresAA[i] != NULL && ambiresAA[i][0] != '\0')
    {
      if (ambiresAA[i][0] == pat)
      {
        n = 1;
        while (ambiresAA[i][n] != '\0')
        {
          res = reshead;
          while (*res != '\0')
          {
            if (*res == ambiresAA[i][n])
              break;
            res++;
          }
          if (*res == '\0')
          {
            *res++ = ambiresAA[i][n];
            *res = '\0';
          }
          n++;
        }
        return;
      }
      i++;
    }
  }
  return;
}

extern ComPatPtr CompilePattern (CharPtr pattern, Int2 moltype)
{
  ComPatPtr    cpp = NULL, headcpp = NULL;
  CharPtr      head;
  Char         temp;
  Boolean      flagCount;

  if (*pattern == '\0')
    return headcpp;

  flagCount = FALSE;
  while (*pattern != '\0')
  {
    switch (*pattern)
    {
     case '(':
      cpp = ComPatNew (cpp);
      if (headcpp == NULL)
        headcpp = cpp;
      pattern++;
      head = pattern;
      while (*pattern != '\0' && *pattern != ')')
      {
        while (*pattern != '\0' && *pattern != ':' &&
               *pattern != '-' && *pattern != ')')
        {
          pattern++;
        }
        if (*pattern != '\0')
        {
          temp = *pattern;
          *pattern = '\0';
          if (cpp->min == 0)
            cpp->min = atoi (head);
          else
            cpp->max = atoi (head);
          *pattern = temp;
          if (*pattern == ')')
            continue;
          pattern++;
          head = pattern;
        }
      }
      if (*pattern == '\0')
      {
        headcpp = ComPatFree (headcpp);
        return headcpp;
      }
      if (cpp->min < 1)
        cpp->min = 1;
      if (cpp->max < cpp->min)
        cpp->max = cpp->min;
      flagCount = TRUE;
      break;
     case '[':
      if (!flagCount)
      {
        cpp = ComPatNew (cpp);
        cpp->min = 1;
        cpp->max = 1;
        if (headcpp == NULL)
          headcpp = cpp;
      }
      flagCount = FALSE;
      pattern++;
      while (*pattern != '\0' && *pattern != ']')
      {
        if (*pattern != '\0')
        {
          AddResidues (cpp->res, *pattern, moltype);
          pattern++;
        }
      }
      if (*pattern == '\0')
      {
        headcpp = ComPatFree (headcpp);
        return headcpp;
      }
      break;
     case ')':
     case ']':
      headcpp = ComPatFree (headcpp);
      return headcpp;
     default:
      if (!flagCount)
      {
        cpp = ComPatNew (cpp);
        cpp->min = 1;
        cpp->max = 1;
        if (headcpp == NULL)
          headcpp = cpp;
      }
      flagCount = FALSE;
      AddResidues (cpp->res, *pattern, moltype);
      break;
    }
    pattern++;
  }

  cpp = headcpp;
  while (cpp != NULL)
  {
    if (*(cpp->res) == '\0')
    {
      headcpp = ComPatFree (headcpp);
      return headcpp;
    }
    cpp = cpp->next;
  }

  return headcpp;
}

static CharPtr ReadPattern  (CharPtr filename)
{
  FILE    *fin;
  CharPtr buff1, buff2;
  CharPtr inbuff, outbuff;
  CharPtr bptr, tptr;
  Int4    blen = 128;

  if ((fin = FileOpen (filename, "r")) == NULL)
    return NULL;

  tptr = MemNew (sizeof (Char) * MAXPAT);
  buff1 = MemNew (sizeof (Char) * blen);
  buff2 = MemNew (sizeof (Char) * blen);
  if (tptr == NULL || buff1 == NULL || buff2 == NULL)
  {
    MemFree (tptr);
    MemFree (buff1);
    MemFree (buff2);
    FileClose (fin);
    return NULL;
  }

  while ((FileGets (buff1, blen, fin)) != NULL)
  {
    inbuff = buff1;
    outbuff = buff2;
    while (*inbuff != '\0')
    {
      if (*inbuff == '#' || *inbuff == ';')
      {
        while (*inbuff != '\n')
          inbuff++;
      }
      if (!IS_WHITESP (*inbuff))
      {
        *outbuff = *inbuff;
        outbuff++;
      }
      inbuff++;
    }
    *outbuff = *inbuff;
    if ((StrLen (buff2) + StrLen (tptr)) > MAXPAT)
    {
      MemFree (tptr);
      MemFree (buff1);
      MemFree (buff2);
      FileClose (fin);
      return NULL;
    }
    StrCat (tptr, buff2);
  }

  bptr = MemNew (StrLen (tptr) + 1);
  StrCpy (bptr, tptr);
  StrUpper (bptr);
  MemFree (tptr);
  MemFree (buff1);
  MemFree (buff2);
  FileClose (fin);
  return bptr;
}

extern void CleanPattern  (CharPtr inbuff)
{
  CharPtr headptr, outbuff;

  if (inbuff == NULL)
    return;
  headptr = outbuff = inbuff;
  while (*inbuff != '\0')
  {
    if (*inbuff == '#' || *inbuff == ';')
    {
      while (*inbuff != '\n')
        inbuff++;
    }
    if (!IS_WHITESP (*inbuff))
    {
      *outbuff = *inbuff;
      outbuff++;
    }
    inbuff++;
  }
  *outbuff = *inbuff;
  StrUpper (headptr);
  return;
}

extern Boolean ResidueMatch (CharPtr seq, ComPatPtr cpp, Int2 moltype,
                             Int4Ptr patlen)
{
  CharPtr res;
  Int4    rescount;

  if (cpp == NULL || *seq == '\0')
    return FALSE;

  *patlen = 0;
  for (rescount = 1; rescount < cpp->min; rescount++)
  {
    res = cpp->res;
    while (*res != '\0')
    {
      if (*seq == *res)
      {
        seq++;
        break;
      }
      res++;
    }
    if (*res == '\0')
      return FALSE;
  }

  res = cpp->res;
  while (*res != '\0')
  {
    if (*seq == *res)
    {
      if (cpp->next == NULL)
      {
        *patlen += rescount;
        return TRUE;
      }
      else
      {
        seq++;
        if (ResidueMatch (seq, cpp->next, moltype, patlen))
        {
          *patlen += rescount;
          return TRUE;
        }
        else
        {
          rescount++;
          if (rescount > cpp->max)
            return FALSE;
          else
          {
            res = cpp->res;
            while (*res != '\0')
            {
              if (*seq == *res)
                break;
              res++;
            }
            if (*res == '\0')
              return FALSE;
            else
              continue;
          }
        }
      }
    }
    res++;
  }
  return FALSE;
}

static CharPtr GetSeqChunk (SeqPortPtr spp, Int4 start, Int4 chunk, Int4 len)
{
  CharPtr seqhead, sequence;
  Int4    i, size;

  if ((start + chunk) > len)
    size = len - start;
  else
    size = chunk;

  seqhead = sequence = MemNew (sizeof (Char) * (size_t) (size+1));
  if (seqhead == NULL)
    return seqhead;

  SeqPortSeek (spp, start, SEEK_SET);
  for (i = 0; i < size; i++)
    *sequence++ = SeqPortGetResidue (spp);
  *sequence = '\0';

  return seqhead;
}

static void PatternMatch (SeqPortPtr spp, Int4 len, ComPatPtr cpp,
                          Int4 plen, FloatHiPtr fptr, Int2 moltype)
{
  Int4       i, n, chunk = SQRDBF;
  Int4       patlen;
  CharPtr    seqhead, sequence;
  FloatHiPtr fptrhead;

  fptrhead = fptr;
  for (i = 0; i < len; i+=(chunk-plen))
  {
    fptr = fptrhead;
    for (n = 0; n < i; n++)
      fptr++;
    seqhead = sequence = GetSeqChunk (spp, i, chunk, len);
    while (*sequence != '\0')
    {
      *fptr = 0.0;
      if (ResidueMatch (sequence, cpp, moltype, &patlen))
        *fptr = 80.0;
      fptr++;
      sequence++;
    }
    MemFree (seqhead);
  }
  return;
}

static void BioPattern (BioseqPtr bsp, FloatHiPtr fptr, Int4 length,
                        Int4Ptr pwindow, Uint1 patterntype,
                        CharPtr pattern, Boolean flagInvert, Int2 moltype)
{
  Int4             i;
  CharPtr          ptrnp;
  SeqPortPtr       spp;
  ComPatPtr        cpp;

  ptrnp = pattern;
  if (pattern == NULL)
  {
    switch (patterntype)
    {
     case NA_PATTERN_HR:
      pattern = ReadPattern ("KSachr.pat");
      break;
     case NA_PATTERN_ATAAG:
      pattern = ReadPattern ("KSataag.pat");
      break;
     case NA_PATTERN_P_ATG:
      pattern = ReadPattern ("KSpatg.pat");
      break;
     case NA_PATTERN_P_K_ATG:
      pattern = ReadPattern ("KSpkatg.pat");
      break;
     default:
      break;
    }
  }
  else
  {
    CleanPattern (pattern);
  }
  if (pattern == NULL || *pattern == '\0')
    return;

  cpp = CompilePattern (pattern, moltype);
  if (flagInvert)
    cpp = InvertPattern (cpp);
  if (ptrnp == NULL)
    MemFree (pattern);
  if (cpp == NULL)
    return;

  i = ComPatLenMax (cpp);
  *pwindow = i;
  if (moltype == 0)
    spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacna);
  else
    spp = SeqPortNew (bsp, 0, bsp->length-1, 0, Seq_code_iupacaa);
  PatternMatch (spp, bsp->length, cpp, i, fptr, moltype);

  SeqPortFree (spp);
  ComPatFree (cpp);

  return;
}

/* functions - external */

extern SeqGraphPtr PatternSeq (BioseqPtr bsp, Int4Ptr window,
                               Uint1 patterntype, SeqGraphPtr sgptr,
                               CharPtr pattern, Int2 moltype)
{
  FloatHiPtr    fptr;
  Int4          gwidth = 500;
  Boolean       flagInvert;
  SeqGraphPtr   sgp;

  if (bsp != NULL)
  {
    if (ISA_na (bsp->mol) && moltype != 0)
      return sgptr;
    if (ISA_aa (bsp->mol) && moltype != 1)
      return sgptr;

    flagInvert = TRUE;
    if (sgptr == NULL || moltype == 1)
      flagInvert = FALSE;

    if ((sgp = SeqGraphNew ()) != NULL)
    {
/* type and  number of values and compression */
      sgp->numval = bsp->length;
      sgp->compl = (Int4) (bsp->length / gwidth);
      if ((bsp->length%gwidth) != 0)
        sgp->compl += 1;
/* graph type */
      sgp->flags[2] = 1;
      sgp->values = (Pointer) MemNew ((sizeof (FloatHi)) * sgp->numval);
/* min/max */
      sgp->max.realvalue = 100.0;
      sgp->min.realvalue = 0.0;
      sgp->axis.realvalue = 0.0;
/* scaling */
      sgp->flags[1] = 1;
      sgp->a = 2;
      sgp->b = 0;
    }
    else
    {
      return sgptr;
    }
/* do it */
    fptr = (FloatHiPtr) sgp->values;
    switch (moltype)
    {
     case 0:
      switch (patterntype)
      {
       case NA_PATTERN:
       case NA_PATTERN_HR:
       case NA_PATTERN_ATAAG:
       case NA_PATTERN_P_ATG:
       case NA_PATTERN_P_K_ATG:
        BioPattern (bsp, fptr, sgp->numval, window, patterntype, pattern,
                    flagInvert, moltype);
        break;
       default:
        sgp = SeqGraphFree (sgp);
        break;
      }
      break;
     case 1:
      switch (patterntype)
      {
       case AA_PATTERN:
        BioPattern (bsp, fptr, sgp->numval, window, patterntype, pattern,
                    flagInvert, moltype);
        break;
       default:
        sgp = SeqGraphFree (sgp);
        break;
      }
      break;
     default:
      return sgptr;
    }
    if (sgptr != NULL)
      sgptr->next = sgp;
    else
      sgptr = sgp;
  }
  return sgptr;
}
