// ----------------- buildlex.cc ----------------
// Build lexicons 
// Take an input dictionary, and create two lexicon libraries.
//   one for lisp BU parser, and another for parse.cc
// the input dictionary must be formatted in the same was as
//   the  esp-angla-vortoj.txt file from ftp.stack.urc.tue.nl/pub/esperanto/
// ';' lines are comment lines
// The filter.list file contains a list of desirable word category/subcategories
// ';' is also comment in the filter file
// Jui-Yuan Fred Hsu.  May 1995 
//  
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2, or (at your option)
// any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.


// ------------------ customization ------------------------------------

#define  LISP_PACKAGE    "\n(in-package :user)\n\n"
#define  LISP_BEGIN_LEX  "(setq *esp-lexicon* '( \n\n"
#define  LISP_END_LEX    "\n))\n"


// ----------------------------------------------------------------------

#include  "Debug.h"
#undef    LOCAL_DEBUG
#define   LOCAL_DEBUG  0 

#include "common.h"          // common definitions
#include "Collect.h"         // Collection of object (dynamic)
#include "strtok.h"          // String Token parser

#include <fstream.h>         // file stream processing

typedef  Collect<StrTok>     StrTokArray;         // Array of String tokens


// ---------------- read_filters --------------------------------------
// ----------------------------------------------------------------------
void read_filter(char *filter_name, StrTokArray & arr, int n=2)
{
  StrTok buf;
  fstream input (filter_name, ios::in);  
  if (!input) { cout <<"Error, can't open "<<filter_name<<endl; exit(1); }

  MESG("\n---- reading from file "<<filter_name<<"\n");

  while ( input>>ws, !input.eof())             
   {
     input >>buf;
     
     if (buf.num()!=0 && buf[0][0]==';') continue;    // skip comment lines
     
     if (buf.num()<n)
      {
        cout <<"Waring, incorrect line in "<<filter_name<<endl; 
        continue;
      }
     arr.Add( new StrTok(buf) );
     
     MESGx(buf[FIL_CAT] <<' ' <<buf[FIL_SUB]<<' ');
     if (n>2) { MESG(buf[FIL_ESP]); }
     else     { MESG(" "); }
   }
} 



// ----------------------- read_dict -----------------------------------
// ----------------------------------------------------------------------
void read_dictionary(char* dict_name, StrTokArray categs[], 
                     StrTokArray& filter, StrTokArray& ex)
{
  int     i, j;
  StrTok  buf;
  char *  categ_c = CATEGS_STR; 

  MESG("\n---- reading from file "<<dict_name<<"\n");
    
  fstream input (dict_name, ios::in);
  if (!input) { cout <<"Error, can't open "<<dict_name<<endl; exit(1); }
  
  while ( input>>ws, !input.eof())               // read input dictionary
   {
     input >> buf;
     if (input.eof()) break;
     
     if (buf.num()==0) continue;                 // skip blan lines
     if (buf[0][0]==';') continue;               // skip comments
     
     if (buf.num() <4)                           // erroneous entries
      {
        cout <<"Warning, line starting with <"<<buf[0]<<"> formatted "
             <<"incorrectly - it's ignored"<<endl;
        continue;
      }
     
     char  cat = buf[COL_CAT][0];               // retrieve main category
     char  sub = buf[COL_SUB][0];               // retrieve sub category
     char* esp = buf[COL_ESP];                  // Esperanto word

     MESGx( "\n"<<cat<<"  "<<sub<<"  "<<buf[0] ); 
     
     int found=0;
     for (i=0; i<filter.Number(); i++)         // check with filter list
       if (cat==filter[i][FIL_CAT][0] &&
           sub==filter[i][FIL_SUB][0] ) 
         { found=1; break; }
     
     if (!found) continue;                     // skip this line

     found=0;
     for (i=0; i<ex.Number(); i++)             // check with exclusion list
       if (cat==ex[i][FIL_CAT][0] &&
           sub==ex[i][FIL_SUB][0] &&
           strcmp(esp, ex[i][FIL_ESP])==0)
         { found=1; break; }
               
     if (found) continue;                      // skip this line
               
     for (i=0; i<N_CATEGS; i++)                // add to proper category
       if (cat==categ_c[i])
        {
          categs[i].Add( new StrTok(buf) ); 
          MESGx(" -> Added"); 
          break;
        }
     
     if (i==N_CATEGS) 
       cout <<"Warning, Main Category <"<<cat<<"> is wrong"<<endl;
   }
  input.close();
}


// --------------------- output_cc ----------------------------------------
// ----------------------------------------------------------------------
void output_cc (StrTokArray categs[])
{
  MESG("\n\n---- writing to file "<<CC_VORTARO<<"\n");
    
  fstream output(CC_VORTARO, ios::out);
  if (!output) { cout <<"Error, can't find "<<CC_VORTARO<<endl; exit(1); }

  for (int i=0; i<N_CATEGS; i++)
    output <<endl <<categs[i] <<endl;

  output.close();     
}


// ------------------- output_lisp ----------------------------------------
// ----------------------------------------------------------------------
void output_lisp (StrTokArray categs[])
{
  MESG("\n\n---- writing to file "<<LISP_VORTARO<<"\n");
  
  fstream output(LISP_VORTARO, ios::out);
  if (!output) { cout <<"Error, can't find "<<LISP_VORTARO<<endl; exit(1); }
  
  output << LISP_PACKAGE;
  output << LISP_BEGIN_LEX;

  for (int i=0; i<N_CATEGS; i++)
   {
     StrTokArray & arr = categs[i];
     
     for (int j=0; j<arr.Number(); j++) 
      {
        char* ptr = CatName( arr[j][COL_CAT][0] );
        char* pt2 = SubName( arr[j][COL_SUB][0] );
        
        if (ptr==NULL) {cerr <<"Internal error, CatName() incorrect\n"; break;}
        if (pt2==NULL) {cerr <<"Internal error, SubName() incorrect\n"; break;}

        output <<"( ";
        
        switch(i)
         {
           case CAT_PREF:  output <<arr[j][COL_ESP] <<"+";  break;
           case CAT_SUFF:  output <<"+" <<arr[j][COL_ESP];  break;
           case CAT_GSUF:  output <<"+" <<arr[j][COL_ESP];  break;
        
           default: output <<arr[j][COL_ESP]; break;
         }
        
        output <<" (" <<ptr <<" (sub "<<pt2 <<"))) \n";
      }
   }

  output << LISP_END_LEX;
  output.close();     
}


// ---------------------- main -------------------------------------
// ----------------------------------------------------------------------
int main()
{
  common_init();                                   // initialize common library

  // ----- categories: self-sust, correlatives, root, prefix, suffix, gramm suf
  
  StrTokArray categs[N_CATEGS];
  StrTokArray fil;                                  // filter list
  StrTokArray ex;
  read_filter(CC_FILTER, fil);                      // read the filter list
  read_filter(CC_EXCLUDE, ex, 3);                   // exclusion list
  read_dictionary(INPUT_VORTARO, categs, fil, ex);  // read main dictionary
  read_dictionary(SUPPL_VORTARO, categs, fil, ex);  // read supplimentary one
  output_cc(categs);                                // output for CC program

  StrTokArray categs2[N_CATEGS];
  StrTokArray fil2;                                   // filter list
  StrTokArray ex2;
  read_filter(LISP_FILTER, fil2);                     // read the fileter list
  read_filter(LISP_EXCLUDE, ex2, 3);                  // exclusion list
  read_dictionary(INPUT_VORTARO, categs2, fil2, ex2); // read main dictionary
  read_dictionary(SUPPL_VORTARO, categs2, fil2, ex2); // read supplimentary one
  output_lisp(categs2);                               // output for LISP program
           
  return 0;
}


// ----------------------  -------------------------------------
// ----------------------------------------------------------------------
/*


*/
