(in-package :user)
;;;======================================================================
;;; NLP code for use with Natural Language Understanding, 2nd ed.
;;; Copyright (C) 1994 James F. Allen
;;;
;;; This program is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 2, or (at your option)
;;; any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program; if not, write to the Free Software
;;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
;;;======================================================================

;;  THE GRAMMAR AND LEXICON
;;  This file contains functions that manage the grammar and the lexicon
;;  These are mostly I/O routines to provide for a more user-friendly system.



;;*******************************************************************************
;;  THE LEXICON DATA STRUCTURE
;; 
;; The lexicon is a hash table indexed by word, with an entry being a
;;   list of constituents.


(defstruct lex-entry constit id)

;; These functions convert from the user specified format into the more cumbersome
;;   internal format, and compute a rule id. 

(let ((lexicon (make-hash-table :size 1))
      (lex-ids nil))

  (defun init-lexicon (size)
    (setq lex-ids nil)
    (setq lexicon
      (make-hash-table :size size)))

  ;; Add constit to lexicon entry for word
  (defun add-to-lex (word constit &optional id)
    (push (make-lex-entry :constit constit
			  :id id)
	  (gethash word lexicon)))

  ;; retrieve the list of constituents associated with word
  (defun retrieve-from-lex (word)
    (gethash word lexicon))

;;  MAKE-LEXICON deletes the old active lexicon and creates a new one
  
  (defun make-lexicon (ls)
    (init-lexicon (length ls))
    (augment-lexicon ls))
  
  ;;  AUGMENT-LEXICON extends the current active lexicon

  (defun augment-lexicon (ls)
    (mapc #'make-lex ls))
  
  (defun get-lexicon nil
    lexicon)

  (defun gen-lex-id (id name)
    (if id 
      (cond ((member id lex-ids)
             (Format t "~%Warning: Duplicate lexical id used: ~S in word ~S" id name)
             id)
            (t (setq lex-ids (cons id lex-ids))
               id))
      (let ((id (gen-symbol name)))
        (setq lex-ids (cons id lex-ids))
        id)))
  
  )  ;; end of scope for LEXICON


;;   GET-LIST-OF-LEXICON-ENTRIES
;;   This returns a list of all the lex-entries for all words

(defun get-list-of-lexicon-entries nil
  (let ((l nil))
    (maphash #'(lambda (word entries)
                 (setq l (append l entries)))
             (get-lexicon))
    l))
    
;; UNKNOWN-WORD checks a sentence of unknown words to provide a warning message

(defun unknown-word (sentence)
  (if sentence
      (if (retrieve-from-lex (car sentence))
	  (unknown-word (cdr sentence))
	(car sentence)
	)))

;;  DEFINED-WORDS returns a list of all defined words

(defun defined-words nil
    (let ((words nil))
      (maphash #'(lambda (word entrylist)
                 (setq words (cons word words)))
             (get-lexicon))
      words))


;; MAKE-LEX creates a lexical entry from the user-input format

(defun make-lex (entry)
  (init-var-table)
  (let ((l (length entry)))
    (if (not (or (eq l 2) (eq l 3))) 
      (Format t "~%WARNING: Bad lexical entry: ~S~%" entry)))
  (let* ((name (car entry))
         (def (cadr entry))
         (cat (car def))
         (feats (cons (list 'LEX name) (cdr def)))
         (id (third entry)))
    (mapcar #'check-feat  feats)
    (if (or (noSemEnabled) (get-fvalue feats 'VAR))
      (add-to-lex name 
		  (build-constit cat(mapcar #'(lambda (x)
						(read-fv-pair x nil))
					    feats) nil)
		  id)
      (add-to-lex name 
		  (build-constit cat (cons (list 'VAR (make-var :name 'V))
					   (mapcar #'(lambda (x)
						       (read-fv-pair x nil))
						   feats))
				 nil)
		  id))))

;;    CHECK-FEAT verifies that a feature-value pair is in the right format

(defun check-feat (fv)
  (if (not (and (listp fv) (eq (length fv) 2)))
    (Format t "~%Warning: bad feature specification: more than one value in ~S~%" fv)
))

;;  ****************

;; LOOKUPWORD finds the entry in the lexicon for a specific word and creates
;;    chart entries for each interpretatiom. To do this, is also must know the starting position of
;;    the word. It sets ENTRY-PROB to the default value of 1.

(defun lookupword (word n)
  (let ((entries nil))
    (mapcar #'(lambda (lex-entry) 
                  (let ((id (lex-entry-id lex-entry)))
                  (setq entries (cons (build-entry 
                                       (instantiateVAR (lex-entry-constit lex-entry))
                                       n (+ n 1) nil 
                                       id
                                       1)
                                      entries))))
            (retrieve-from-lex word))
    entries))



;;*******************
;;  LEXICAL CATEGORIES

(let ((lexicalCats '(n v adj art p aux pro qdet pp-wrd name to)))
  
  (defun defineLexicalCats (cs)
    (if (listp cs) 
      (setq lexicalCats cs)
      (Format t "Bad Format in ~S~%  you must pass in a list of lexical categories" cs)))

  (defun addLexicalCat (c)
    (if (symbolp c)
	(if (not (member c lexicalCats))
	    (setq lexicalCats (cons c lexicalCats)))
      (Format t "Lexical categories must be atoms. ~S is ignored" c)))
  
  (defun getLexicalCats nil
    lexicalCats)
  
  (defun lexicalConstit (c)
    (and (constit-p c) (member (constit-cat c) lexicalCats)))
  
  (defun nonLexicalConstit (c)
    (and (constit-p c) (not (member (constit-cat c) lexicalCats))))

)  ;; end scope of LEXICALCATS


;;**************************************************************************
;;  THE GRAMMAR DATA STRUCTURE

;;     Grammar rules are of the form
;;        (<constit-pattern>  ->  <constit-pattern> ... <constit-pattern>)
;; e.g.,  ((S (INV -) (AGR ?a)) -> (NP (AGR ?a)) (VP (AGR ?a)))

(defstruct (rule
            (:print-function (lambda (p s k)
                               (Format s "~%<~S~%   ~S ~S>" (rule-lhs p) (rule-id p) (rule-rhs p)))))
  lhs id rhs)

;; this copies all the variables in a rule, making sure that identical variables
;;  in different places are replaced by the identical copy
;;  This allows the same rule to be used multiple times in a parse without
;;   running into any variable conflict problems
;;   Any variables specified in the binding list will be replaced by their value
;;    just as they would using the subst-in function

; --------- Fred, Mar 9, 95 ----       
; This is where the action of binding takes place
; rule -> complete rule
; bndgs -> which variables to instantiate

(defun copy-vars-in-rule (rule bndgs)
  (set-var-table bndgs)
  (make-rule :lhs (copy-constit-in-rule (rule-lhs rule))
             :id (rule-id rule)
             :rhs (mapcar #'copy-constit-in-rule (rule-rhs rule))))

(defun copy-constit-in-rule (constit)
  (make-constit :cat (constit-cat constit)
                :feats (copy-vars-in-feats (constit-feats constit))
                :head (constit-head constit)))

(defun copy-vars-in-feats (x)
  (cond ((symbolp x) x)
        ((var-p x)
         (let ((newvar (get-var x)))
           (if newvar newvar
               (add-var x (make-var :name (var-name x) :values (var-values x))))))

; --------- Fred, Mar 9, 95 ----       
; it's a simple list
; this is called each time the rule is PARTIALLY matched
;
; check if it is our 'lisp execution' expression

        ((listp x)

	(setq x (mapcar #'copy-vars-in-feats x) )          ; bind all variables

;	(write '(this is a list -- x before execution ))        
;	(write x)

;	(when (eq (car x) '^) (setq x (eval (cadr x))) )

;	(write '(this is a list -- x after execution ))        
;	(write x)

	x
        )

; it's a contituent

        ((constit-p x)

;(write '(this is a constituent))
;(write x)

         (make-constit :cat (constit-cat x)
                       :feats (copy-vars-in-feats (constit-feats x))
                       :head (constit-head x))
        )
        
        (t x)))
         

(let ((grammar nil)
      (rule-ids nil))
  
  ;; MAKE-GRAMMAR removes the old active grammar and creates a new one
  
  (defun make-grammar (g)
    (setq rule-ids nil)
    (setq grammar (convert-grammar g)))
  
  ;;AUGMENT-GRAMMAR adds a new grammar onto the existing active grammar 
  
  (defun augment-grammar (g)
    (setq grammar (append grammar (convert-grammar g))))
  
  (defun getGrammar nil
    grammar)

  (defun verify-rule-id (id)
    (if (not (member id rule-ids))
       (setq rule-ids (cons id rule-ids))))
  
  ) ;; end scope of variable GRAMMAR

;;  CONSTRUCTION OF GRAMMAR FROM INPUT FORMAT

;;   These functions convert a grammar specified in CAT or headfeature
;;   format into internal grammar format

;; CONVERT-GRAMMAR does the actual conversion from the input formats

(defun convert-grammar (g)
  (let ((format (car g))
        (rules (cdr g)))
    (Cond ((eq format 'CAT)
           (merge-lists (mapcar #'build-rule rules)))
          ((eq (car format) 'Headfeatures)
             (mapcar #'(lambda (x)
                         (insertHeadFeatures x (cdr format)))
                      (merge-lists (mapcar #'build-rule rules))))
          (t (Warn "***WARNING*** Bad grammar format") g))))

;;  MERGE-LISTS collapses a list of lists into one list (using append)
   
(defun merge-lists (g)
  (cond ((null g) nil)
        (t (append (car g) (merge-lists (cdr g))))))
                                                                     
;;  BUILD-RULE
;;   inserts the CAT feature for each constituent and builds all the variables.
;;   It also checks the format of the rule.
;;   It returns a list of transformed rules. If gaps are disabled, 
;;   there will only be one element in the list. Otherwise, multiple rules
;;   may be generated using the algorithm in Chapter 5.  


(defun build-rule (r)
  (init-var-table)
  (if (not (verify-rule-id (cadr r)))
    (Format t "~% WARNING: Duplicate rule id, ~S, used in rule~%   ~S~%    Grammar will not work with probabilistic  options"
            (cadr r) r))
  (let ((newrule
         (make-rule :lhs (Verify-and-build-constit (car r) r nil)
                          :id (cadr r) 
                          :rhs (mapcar #'(lambda (x)
                                           (cond ((eq (car x) 'head)
                                                  (if (caddr x) (Format t "~%***WARNING*** Bad head specification format in rule ~%~S~%"
                                                                        r))
                                                  (verify-and-build-constit (cadr x) r t))
                                                 (t (verify-and-build-constit x r nil))))
                                       (cddr r)))))
    (if (GapsDisabled) (list newrule)
        (generate-gap-features-in-rule newrule))))


;;  This checks the form of a constituent in a rule, and converts it to
;;   the internal format. The rule arg is just passe in for the error message

(defun verify-and-build-constit (constit rule head)
  (if (not (atom (car constit)))
    (Format t "~%***WARNING*** Constituent category must be an atom. Bad constituent ~s~%     in rule ~s~%"
            constit rule))
  (let ((feats (mapcar #'(lambda (x) (read-fv-pair x rule))
                       (cdr constit))))
    (build-constit (car constit) feats head)))

;;  READ-FV-PAIR reads a single feature-value pair and returns its internal format

(defun read-fv-pair (fv-pair rule)
    (if (not (and (listp fv-pair) 
                  (eql (list-length fv-pair) 2)))
      (Format t "~%***WARNING*** Bad feature-value specification ~s in rule ~s~%"
              fv-pair rule)
      (list (car fv-pair) (read-value (cadr fv-pair) rule))))

;;  READ-VALUE checks the value to see if it is a variable, or an embedded
;;   constituent.

(defun read-value (val rule)
  (cond 
        ; --------------- value is a variable
        
        ((isvar val)
         (let* ((var-name (if (atom val)
			      (string-left-trim "?" val)
			    (symbol-name (cadr val))))
                (var (get-var var-name)))
           (cond (var
		  (if (and (listp val)
			   (cddr val))
		      (if (and (var-values var)
			       (not (equal (var-values var)
					   (cddr val))))
			  (format t "~&Warning: conflicting values given ~
                                  to the same variable ~s in rule ~s~%"
				  var
				  rule)
			(setf (var-values var) (cddr val))))
		  var)
		 (t
		  (if (atom val)
		      (add-var var-name (build-var (gentemp var-name)
						   nil))
		    (add-var var-name (build-var (gentemp var-name)
						 (cddr val))))))))

        ; ------------- value is an embedeed constituent

        ((isembeddedconstit val)
         (if (and (or (atom (second val))
		      (isvar (second val)))
                  (listp (third val)))
           (make-constit :cat (read-value (cadr val) rule)
			 :feats (mapcar #'(lambda (x) (read-fv-pair x rule))
					(cddr val)))
           (Format t "~%Warning: bad embedded constituent specification found: ~S~%" val)))

        ; ------ value is an atomic unit        
      
        ((atom val) val)
      
        ; ------- value is a list 
        
        ((listp val) (mapcar #'(lambda (x) (read-value x rule)) val))))

;;  This allows variables to be specified in three different forms 
;;           ?X,  (? X), or (? X Val1 ... Valn)

(defun isvar (expr)
  (or (and (symbolp expr)
           (equal (char (symbol-name expr) 0) #\?))
      (and (listp expr)
           (equal (car expr) '?))))

;;  Embedded constituents are of form (% cat feat-val-list)
(defun isembeddedconstit (expr)
  (and (listp expr)
       (equal (car expr) '%)))


;;   VAR-TABLE MAINTENANCE
;;  These functions maintain a binding list of variables. This is used to make sure
;;   that variables in the input get interpret as the same variable structure.

(let ((var-table nil))

  (defun init-var-table nil
    (setq var-table nil))

  (defun set-var-table (bndgs)
    (setq var-table bndgs))

  (defun get-var (x)
    (cadr (assoc x var-table :test #'equal)))
  
  (defun add-var (name var)
    (setq var-table (cons (list name var)
                          var-table))
    var)

) ;; end scope of VAR-TABLE


;;  INSERTHEADFEATURES

;; inserts head features into a rule

(defun insertHeadFeatures (rule headfeatList)
  (let* ((mother (rule-lhs rule))
         (headfeats (cdr (assoc (constit-cat mother) headfeatList)))
         (rhs (rule-rhs rule))
         (head (findfirsthead rhs)))
    (cond 
     ;;  If there are no head features, just return the old rule
     ((null headfeats) rule)
     ;;  Otherwise, construct the feature-value pairs for the headfeats and insert them
     (t
      (if (null head) (Format t "~%****WARNING: No head specified in rule ~s" rule))
      (Insertfeatures rule 
                      (mapcar #'(lambda (hf)
                                  (BuildHeadFeat hf mother head rule))
                              headfeats))))))

             
;;  BUILDHEADFEAT builds a feature/value pair to insert in the mother and head
;;  We must check both the mother and head to see if these features already are
;;  defined
(defun BuildHeadFeat (headfeat mother head rule)
  (let ((mval (get-value mother headfeat))
        (hval (get-value head headfeat))
        (varname (gen-symbol headfeat)))
    (cond ((and (null mval) (null hval))
           (list headfeat (make-var :name varname)))
          ((null mval)
           (list headfeat
		 (if (var-p hval)
		     hval
		   (make-var :name varname :values (list hval)))))
          ((or (null hval)
	       (equal mval hval))
           (list headfeat
		 (if (var-p mval)
		     mval
		   (make-var :name varname :values (list mval)))))
          (t (Format t "~%***WARNING*** Head feature ~s incompatible ~%   in rule ~s"
                     headfeat rule)))))

(defun findFirstHead (rhs)
  (cond ((null rhs) nil)
        ((constit-head (car rhs)) (car rhs))
        (t (findFirstHead (cdr rhs)))))

;;INSERTFEATURES builds the rule, inserts the feature-value pairs (values)
;;   into the mother and any consituent on the rhs marked as a head.

(defun insertfeatures (rule values)
  (let ((mother (rule-lhs rule)))
    (make-rule :lhs (build-constit (constit-cat mother)
                                   (mergefeatures (constit-feats mother) values) nil)
               :id (rule-id rule)
               :rhs (mapcar #'(lambda (c)
                                (if (constit-head c)
                                  (build-constit (constit-cat c)
                                                 (mergefeatures (constit-feats c) values)
                                                 t)
                                  c))
                            (rule-rhs rule)))))

;; MERGEFEATURES adds the feature-value pairs in feats to the constit,
;; It assumes that the feature value in newfeats is the one desired if
;; both present

(defun mergefeatures (oldfeats newfeats &optional results)
  (if (null oldfeats)
      (append results newfeats)
    (let* ((oldpair (car oldfeats))
	   (feat (car oldpair))
	   (newpair (assoc feat newfeats))
	   (pair (if newpair newpair (car oldfeats))))
      (mergefeatures (cdr oldfeats)
		     (removefeature feat newfeats)
		     (cons pair results)))))


;; REMOVEFEATURE returns a copy of a feature list with the feature named fname removed

(defun removefeature (fname flist)
 (remove-if #'(lambda (y) (eq fname (car y))) flist))

;;  GEN-SYMBOL generates a unique identifier to identify a constituent

(defun gen-symbol (name)
  (gentemp (string name)))

;;********************************************************************************
;;
;;   USER ACCESS TO THE GRAMMAR

(defun get-grammar ()
  (getGrammar))

(defun show-grammar ()
  (mapcar #'%print (getGrammar))
  t)

(defun %print (obj)
  (Format t "~%~S" obj)
)
         
;; ********************************************************************************
;;
;;   PRINTING OUT THE CHART
;;
;;  Printing the entire chart

(defun show-chart nil
  (Format t "~%~%  T H E   C H A R T ~%")
  (mapcar #'Show-named-entry (get-constits-by-name))
  (Format t "~%"))

(defun show-named-entry (e)
  (let* ((entry (cadr e))
         (name (car e)))
    (show-entry-with-name name entry)))

(defun show-entry-with-name (name entry)
    (Format t "~s:~S from ~S to ~S" name (entry-constit entry)
            (entry-start entry) (entry-end entry))
    (if (is-prob-parse)
      (Format t ", Prob = ~s" (entry-prob entry)))
    (Format t "~%")
)

;;  Printing out every S structure that spans the sentence

(defun show-answers nil
  (Format t "~%~% THE COMPLETE PARSES FOUND~%")
  (mapcar #'(lambda (x)
              (Print-solution x (get-sentence-length)))
                                 (get-constits-by-name))
  t)

(defun print-solution (ce length)
  (let* ((entry (cadr ce))
        (c (entry-constit entry))
        (g (get-value c 'gap)))
    (if (and (eq (get-value c 'cat) 's)
             (eq (entry-start entry) 0) 
             (eq (entry-end entry) length)
             (or (eq g '-) (null g))) 
      (print-tree 0 (car ce) entry nil))))

; -------- Fred, mar 10, 1995
; also may 95
;  
; if global var *partial-ok* is set to true, then simply find partial parsing

(setq *partial-ok* t)

(defun partial-ok ()      (setq *partial-ok* t))
(defun partial-not-ok ()  (setq *partial-ok* nil))

(defun show-sem ()
  (Format t "~%~%Semantic Interpretation~%~%")
  
  (write *sentence*)                   ; print original sentence
  (Format t "~%~%")

  (setq *completed-parse*
      (make-constit :cat 'self                         ; default value
                    :feats '((lex ne) (sub s_adv)) ))

  (when (not *partial-ok*)
    (mapcar #'(lambda (x)                              ; search each parsing
                (Print-sem x (get-sentence-length)))   ; find final 'sentence'
                             (get-constits-by-name))
  )
  
  (when *partial-ok* 
    (setq continue t)
    (mapcar #'(lambda (x)                              ; search each parsing
               (setq continue 
                (find-best-parse x 
                                 (get-sentence-length)
                                 continue)))
                             (get-constits-by-name))
  )
  
t)

(setq *completed-parse* 0)


; ---- trim off the parsed sented.  remove redundant features. save result
; the resulting sentence can be fed to (realize) to do sentence generation

(defun save-parse (c)
 
   (setq feat-l (constit-feats c))
   
   (mapcar #'(lambda (x)

         (setq feat-l 
             (removefeature x feat-l))
      )

      '( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 var)
   )
   
   (setq *completed-parse* 
      (make-constit :cat (get-value c 'cat)
                    :feats feat-l ))
)

; ---------- find-best-parse ------------
;
; find the best parse: the first full parse found
; this thing is used to replace (print-sem). 

(defun find-best-parse (ce length continue)         
  (let* ((entry (cadr ce))            
         (c (entry-constit entry))
         (sem (get-value c 'sem))
         (g (get-value c 'gap)))
         
    (when (and (eq (entry-start entry) 0) 
               (eq (entry-end entry) length)   ; sentence fully completed?
               (or (eq g '-) (null g))
               continue ) 

          (save-parse c)                     ; save the completed parsing

          (write *completed-parse*)          ; print parsed sentence
          (Format t "~%~%")

          (setq Xdepth 0)
          (mapcar #'print-sem2 sem)          ; print semantics 
                 
          (Format t "~%~%")
          
          (setq continue nil)                ; stop searching
     )
     
    continue
   )
)


; ----------- print-sem -----------
;
; check if this is a finished sentence
; ce: parsing result
; length: length of original sentence

(defun print-sem (ce length)         
  (let* ((entry (cadr ce))            
         (c (entry-constit entry))
         (sem (get-value c 'sem))
         (g (get-value c 'gap)))
         
    (when (and (eq (get-value c 'cat) 's)    ; category "s" ? (sentence)
             (eq (entry-start entry) 0) 
             (eq (entry-end entry) length)   ; sentence fully completed?
             (or (eq g '-) (null g))) 

          (save-parse c)                     ; save the completed parsing

          (write *completed-parse*)          ; print parsed sentence
          (Format t "~%~%")

          (setq Xdepth 0)
          (mapcar #'print-sem2 sem)          ; print semantics 
                 
         (Format t "~%~%")
     )))

(setq Xdepth 0)

; --------- print semantics entry 

(defun print-sem2 ( x )
    
   (cond 
  
     ( (or (not (listp x))      
           (not (listp (car x))))
           
       (print-blanks Xdepth) (write x) (format t "~%") )

     ( t  (print-blanks Xdepth) (format t "[~%")
          (setq Xdepth (+ Xdepth 1)) 
          (mapcar #'print-sem2 x) 
          (setq Xdepth (- Xdepth 1))
          (print-blanks Xdepth) (format t "]~%")
     )
   )
)

     
;; Prints out a constituent, instantiating the variables in its subconstituents 
;;  and prints them with appropriate indentation

(defun print-tree (prefix name entry bindings)
  (let* ((subconstitnames (getsubconstitnames 1 (entry-constit entry)))
         (subentries (mapcar #'get-entry-by-name
                              subconstitnames))
         (subconstits (mapcar #'entry-constit subentries))
         (bndgs (merge-lists 
                 (cons bindings (mapcar #'constit-match (entry-rhs entry) subconstits)))))
  (print-blanks prefix)
  (show-entry-with-name name entry)
  (mapcar #'(lambda (n e) (print-tree (1+ prefix) n e bndgs))
             subconstitnames (mapcar #'(lambda (e) (subst-in e bndgs)) subentries))))
         
(defun getsubconstitnames (n constit)
  (let ((sub (get-value constit n)))
    (if sub (cons sub (getsubconstitnames (1+ n) constit))
        nil)))

(defun print-blanks (n)
 (dotimes (i n)
   (format t "  ")))
                          
              




