#include <ncbi.h>
#include <accentr.h>
#include <gather.h>
#include <tofasta.h>
#include <urkptpf.h>

#define TOP_ERROR 1
static char _this_module[] = "srchnt";
#undef  THIS_MODULE
#define THIS_MODULE _this_module
static char _this_file[] = __FILE__;
#undef  THIS_FILE
#define THIS_FILE _this_file

typedef struct gather_Nuc_Bioseq
{
  Int4      gi;
  BioseqPtr bsp;
} Gather_NBS, PNTR Gather_NBSPtr;

Args myargs[] =
{
  { "nucleotide GI", "0", "0", "4000000", TRUE,
    'g', ARG_INT, 0.0, 0, NULL},
  { "FastA file", NULL, NULL, NULL, TRUE,
    'f', ARG_STRING, 0.0, 0, NULL },
  { "local REN file", NULL, NULL, NULL, TRUE,
    'r', ARG_STRING, 0.0, 0, NULL },
  { "sort by fragment length", "FALSE", "FALSE", "TRUE", TRUE,
    's', ARG_BOOLEAN, 0.0, 0, NULL},
  { "mismatch", "0", "0", "2", TRUE,
    'm', ARG_INT, 0.0, 0, NULL},
  { "enzyme with known cutsite only", "TRUE", "FALSE", "TRUE", TRUE,
    'c', ARG_BOOLEAN, 0.0, 0, NULL},
  { "use ncbirnam.dat names file", "FALSE", "FALSE", "TRUE", TRUE,
    'n', ARG_BOOLEAN, 0.0, 0, NULL},
  { "user names file", NULL, NULL, NULL, TRUE,
    'N', ARG_STRING, 0.0, 0, NULL }
};

static Boolean GetBioseq (GatherContextPtr gcp)
{
  Gather_NBSPtr  gnbsp;
  BioseqPtr      bsp;
  Int4           gi, entrezgi;

  if (gcp == NULL)
    return FALSE;
  if ((gnbsp = (Gather_NBSPtr) gcp->userdata) == NULL)
    return FALSE;

  if (gnbsp->bsp != NULL)
    return TRUE;
  if (gcp->thistype != OBJ_BIOSEQ)
    return TRUE;
  if ((bsp = (BioseqPtr) (gcp->thisitem)) == NULL)
    return TRUE;

  gi = gnbsp->gi;
  if (gi > 0)
  {
    entrezgi = GetGIForSeqId (bsp->id);
    if (gi == entrezgi)
      gnbsp->bsp = bsp;
    return TRUE;
  }
  else
  {
    gnbsp->bsp = bsp;
    return TRUE;
  }
}

Int2 Main ()
{
  Int2        argcount;
  Boolean     flagHaveNet;

  Int4        gi;
  SeqEntryPtr sep;
  ComPatPtr   cpp, cpph = NULL;
  SeqAlignPtr sap, sapn;
  SeqLocPtr   slp, slpn, slpt;
  StdSegPtr   ssp;
  Int4        start;

  FILE        *fiop;
  Char        fastafile[256], namesfile[256];
  ValNodePtr  namelist;

  static CharPtr pattern_file = "ncbiren.dat";
  static CharPtr names_file = "ncbirnam.dat";

  static GatherScope  gs;
  GatherScopePtr      gsp;
  static Gather_NBS   gpbs;
  Gather_NBSPtr       gnbsp;

  argcount = sizeof (myargs) / sizeof (Args);
  if (!GetArgs ("REN_Search", argcount, myargs))
    return 1;

  if (myargs[0].intvalue == 0 && myargs[1].strvalue == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 100,
               "No gi or FastA file given :: for help :   srchnt -");
    ErrShow ();
    exit (1);
  }

  gsp = &gs;
  gnbsp = &gpbs;

  MemSet ((Pointer) gsp, 0, sizeof (GatherScope));
  MemSet ((Pointer) gsp->ignore, (int) (TRUE),
          (size_t) (OBJ_MAX * sizeof (Boolean)));

  gsp->ignore[OBJ_SEQDESC] = TRUE;
  gsp->ignore[OBJ_BIOSEQ] = FALSE;

  gnbsp->bsp = NULL;

  gi = myargs[0].intvalue;
  if (myargs[1].strvalue != NULL)
    StrCpy (fastafile, myargs[1].strvalue);
  else
    fastafile[0] = '\0';

  if (gi > 0)
  {
    if (!EntrezInit ("srchnt", FALSE, &flagHaveNet))
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 102,
                 "Entrez init failed");
      ErrShow ();
      exit (1);
    }
  }

  fiop = NULL;
  if (gi > 0)
  {
    sep = EntrezSeqEntryGet (gi, SEQENTRY_READ_BIOSEQ);
  }
  else
  {
    if ((fiop = FileOpen (fastafile, "r")) == NULL)
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 103,
                 "Failed to open FastA file: %s", fastafile);
      ErrShow ();
      exit (1);
    }
    sep = FastaToSeqEntry (fiop, TRUE);
  }

  if (sep == NULL)
  {
    ErrPostEx (SEV_ERROR, TOP_ERROR, 104,
               "No seqentry found");
    ErrShow ();
    exit (1);
  }

  namesfile[0] = '\0';
  if (myargs[6].intvalue)
    StrCpy (namesfile, names_file);
  if (myargs[7].strvalue != NULL)
    StrCpy (namesfile, myargs[7].strvalue);

  namelist = ReadPatternNames (namesfile);

  while (sep != NULL)
  {
    gnbsp->gi = gi;
    GatherSeqEntry (sep, (Pointer) gnbsp, GetBioseq, (Pointer) gsp);

    if (gnbsp->bsp != NULL)
    {
      if (ISA_na (gnbsp->bsp->mol))
      {
        if (cpph == NULL)
        {
          if (myargs[2].strvalue != NULL)
            cpph = ReadRENPattern (myargs[2].strvalue,
                                         (Boolean) myargs[5].intvalue,
                                         namelist);
          else
            cpph = ReadRENPattern (pattern_file,
                                         (Boolean) myargs[5].intvalue,
                                         namelist);
          PalindromeCheck (cpph);
        }
        cpp = cpph;
        while (cpp != NULL)
        {
          printf (">%s\n", cpp->name);
          sap = PatternMatchBioseq (gnbsp->bsp, cpp,
                                    (Boolean) myargs[4].intvalue);
          if (sap != NULL)
            printf ("   Start\n");
          SeqAlignSortByStart (&sap);
          if (myargs[3].intvalue)
          {
            EmbedFragLengthInfo (sap, gnbsp->bsp->length);
            SeqAlignSortByLength (&sap);
            while (sap != NULL)
            {
              ssp = (StdSegPtr) sap->segs;
              slp = ssp->loc;
              if (slp->choice != SEQLOC_MIX)
              {
                start = SeqLocStart (slp);
              }
              else
              {
                slpt = (SeqLocPtr) slp->data.ptrvalue;
                start = SeqLocStart (slpt);
              }
              printf ("%8ld  %8ld\n", (long) start+1,
                                      (long) ssp->scores->value.intvalue);
              sapn = sap->next;
              SeqAlignFree (sap);
              sap = sapn;
            }
          }
          else
          {
            slp = MatchSa2Sl (&sap);
            while (slp != NULL)
            {
              if (slp->choice != SEQLOC_MIX)
              {
                start = SeqLocStart (slp);
              }
              else
              {
                slpt = (SeqLocPtr) slp->data.ptrvalue;
                start = SeqLocStart (slpt);
              }
              printf ("%8ld\n", (long) start+1);
              slpn = slp->next;
              SeqLocFree (slp);
              slp = slpn;
            }
          }
          cpp = cpp->nextpattern;
        }
      }
      else
      {
        ErrPostEx (SEV_ERROR, TOP_ERROR, 106,
                   "Not a nucleic bioseq");
        ErrShow ();
        exit (1);
      }
    }
    else
    {
      ErrPostEx (SEV_ERROR, TOP_ERROR, 105,
                 "No bioseq found");
      ErrShow ();
      exit (1);
    }
    SeqEntryFree (sep);
    sep = NULL;
    if (fiop != NULL)
      sep = FastaToSeqEntry (fiop, TRUE);
  }

  urkComPatFree (cpph);
  ValNodeFreeData (namelist);
  FileClose (fiop);
  if (gi > 0)
    EntrezFini ();

  return 0;
}
