:- module(default_rule_dcg, [transfer_term/3, var_char/2]).

:- use_module(library(lists)).

:- op(1050, xfx, ==>).
:- op(1050, xfx, ?=>).
:- op(1055, xfy, &&).
:- op(1160, xfx, ::).
:- op(1060, xfx, :=).
:- op(1060, fx, /-).


% Declare that variables must start with "%"
var_char('%', 0'%).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% This is a direct clause grammar (DCG) defining the transfer rule
%%% notation.   You can use this grammar as a model if you want to define
%%% your own notation for transfer rules.
%%%
%%% In case you do want define an alternative notation, you need to take
%%% the following steps:
%%% (a) create your own grammar file, whose first non-comment should be
%%%
%%%    :- module(alt_rule_dcg, [transfer_term/3, var_char/2]).
%%%
%%%     where transfer_term is the start symbol of the grammar, and
%%%     var_char declares the single character starting variables.
%%%     The latter has to be defined (preferably at the top of the
%%%     file), e.g.
%%%
%%%       var_char('?', 0'?).
%%%
%%% (b) in any rule set that is to use the alternative notation, the very
%%% first line should be
%%%
%%%    "alt_rule_dcg <File>"
%%%
%%% where <File> gives the file name of the dcg (either absolute, or
%%% relative to the rule file).   So for example the first line might be
%%%
%%% "alt_rule_format $XLE/bin/alt_transfer_dcg.pl"
%%%
%%% (c) You may also find the following notes helpful in constructing your
%%% own grammar
%%%
%%% About DCGs (From the prolog manual):
%%% ====================================
%%%
%%% Definite clause grammars are an extension of the well-known
%%% context-free grammars. A grammar rule in Prolog takes the general form
%%%
%%%   head --> body.
%%%
%%% meaning "a possible form for head is body". Both body and head are
%%% sequences of one or more items linked by the standard Prolog
%%% conjunction operator ','.
%%%
%%% Definite clause grammars extend context-free grammars in the following ways:%%%
%%%
%%% 1. A non-terminal symbol may be any Prolog term (other than a variable
%%% or number).
%%%
%%% 2. A terminal symbol may be any Prolog term. To distinguish terminals
%%% from nonterminals, a sequence of one or more terminal symbols is
%%% written within a grammar rule  as a Prolog list. An empty sequence is
%%% written as the empty list '[]'. If the terminal symbols are character
%%% codes, such code-lists can be written (as elsewhere) as strings.
%%% An empty sequence is written as the empty list, '[]' or '""'.
%%%
%%% 3. Extra conditions, in the form of Prolog procedure calls, may be
%%% included in the righthand side of a grammar rule. Such procedure calls
%%% are written enclosed in '{}' brackets.
%%%
%%% 4. The left-hand side of a grammar rule consists of a non-terminal,
%%% optionally followed by a sequence of terminals (again written as a
%%% Prolog list).
%%%
%%% 5. Disjunction, if-then, if-then-else, and not-provable may be stated
%%% explicitly in the righthand  side of a grammar rule, using the
%%% operators ';' ('|'), '->', and '\+' as in a Prolog clause.
%%%
%%% 6. The cut symbol may be included in the right-hand side of a grammar
%%% rule, as in a Prolog clause. The cut symbol does not need to be
%%% enclosed in '{}' brackets.
%%%
%%% As an example, here is a simple grammar that parses an arithmetic
%%% expression (made up of digits and operators) and computes its value.
%%%
%%%   expr(Z) --> term(X), "+", expr(Y), {Z is X + Y}.
%%%   expr(Z) --> term(X), "-", expr(Y), {Z is X - Y}.
%%%   expr(X) --> term(X).
%%%   term(Z) --> number(X), "*", term(Y), {Z is X * Y}.
%%%   term(Z) --> number(X), "/", term(Y), {Z is X / Y}.
%%%   term(Z) --> number(Z).
%%%   number(C) --> "+", number(C).
%%%   number(C) --> "-", number(X), {C is -X}.
%%%   number(X) --> [C], {"0"=<C, C=<"9", X is C - "0"}.
%%%
%%% In the last rule, C is the character code of some digit.
%%%
%%%
%%% ====================================
%%%
%%% Some other notes:
%%%  * DCGs do not handle left recursion, so you need to be careful to
%%%    avoid this
%%%
%%%  * The arguments to non-terminal symbols are generally used to hold
%%%    the prolog terms that result from parsing input strings to those
%%%    categories.
%%%
%%%  * ws is a non-terminal used to consume whitespace
%%%
%%%  * In the rules below, both cut (!) and disjunction (;) are used to
%%%    make the rules more deterministic.  For example, and alternative
%%%    way of defining xfr_rule might be
%%%   
%%%    xfr_rule(LHS ==> RHS) -->
%%%        prolog_tuple(LHS), ws, "==>", !, ws, prolog_tuple(RHS).
%%%    xfr_rule(LHS ?=> RHS) -->
%%%        prolog_tuple(LHS), ws, "?=>", !, ws, prolog_tuple(RHS).
%%%
%%%    However, this way of doing it means that the left hand side of an
%%%    optional rule gets parsed twice: once on the assumption that it is
%%%    the lhs of an obligatory rule (which gets thrown away because we
%%%    don't then encounter a ==>), and then again as the lhs of an
%%%    optional rule.   The actual rule we use,
%%%   
%%%    xfr_rule(Rule) -->
%%%     prolog_tuple(LHS), ws,
%%%     (
%%%       ("==>", !, ws, prolog_tuple(RHS), {Rule = (LHS ==> RHS)})
%%%     ;
%%%       ("?=>", !, ws, prolog_tuple(RHS), {Rule = (LHS ?=> RHS)})
%%%     ).
%%%
%%%    ensures that the lhs is parsed only once.
%%%
%%%  * Procedural attachments to transfer rules (enclosed between braces
%%%    in the standard transfer notation defined here, just as DCGs enclose
%%%    procedural attachments between braces), need to be parse according
%%%    to full prolog syntax.
%%%
%%%  * The notation 0'<c>  is used by prolog to represent the ascii
%%%    code correspdoning to some character, e.g. 0'a = X.   Strings
%%%    (enclosed between double quotes) are just lists of these ascii
%%%    codes.



%-----------------------------------
% Top level transfer rule expression:


transfer_term((grammar=Name)) -->
    "grammar", !, ws, "=", ws, xfr_atom(Name), ws, ".".
transfer_term(procedural_attachments=Name) -->
    "procedural_attachments", !, ws, "=", ws, xfr_atom(Name), ws, ".".
transfer_term((:- Call)) -->
    ":-", !, ws, xfr_tuple(Call), ws, ".".
transfer_term(include(File)) -->
    "include(", !, ws, xfr_atom(File), ws, ")", ws, ".".
transfer_term((/- Fact)) -->
    "/-", !, ws, xfr_term(Fact), ws, ".".
transfer_term(MacroOrTemplate) -->
    xfr_term(Head), ws,
    (
      (":=", !, ws, macro_body(Body), ws, ".",
       {MacroOrTemplate = (Head := Body)})
    ;
      ("::", !, ws, template_body(Body),
       {MacroOrTemplate = (Head :: Body)})
    ).
transfer_term(Rule) -->
    xfr_rule(R1), ws,
    !,
    (
      ("&&", !, ws, rule_union(R2), ws, ".", {Rule = (R1 && R2)})
    ;
      (".", !, {Rule = R1})
    ).
transfer_term(RuleUnion) -->
    "(",  ws, rule_union(R1), ws, ")", ws,
    "&&", ws, rule_union(R2), ws, ".",
    {RuleUnion = (R1 && R2)}.


%-----------------------------------
% Rules

xfr_rule(Rule) -->
    xfr_tuple(LHS), ws,
    (
      ("==>", !, ws, xfr_tuple(RHS), {Rule = (LHS ==> RHS)})
    ;
      ("?=>", !, ws, xfr_tuple(RHS), {Rule = (LHS ?=> RHS)})
    ).
xfr_rule(TemplateCall) -->
    xfr_term(TemplateCall).



%-----------------------------------
% Rule union
rule_union((First && Rest)) -->
    "(", !, ws, rule_union(First), ws, ")", ws, "&&",
    ws, rule_union(Rest).
rule_union((First && Rest)) -->
    xfr_rule(First), ws, "&&", !, ws,
    rule_union(Rest).
rule_union(Last) -->
    xfr_rule(Last).



%-----------------------------------
% Whitespace:
ws --> [C], {member(C, " \t\n\v\f")}, !, ws.
ws --> [].


%-----------------------------------
% Transfer tuples:
%   --- rather a nuisance having to avoid left recursion

xfr_tuple(Tuples) -->
    "(", !, ws, xfr_tuple(T1), ws, ")",
    xfr_tuples1(T1, Tuples).
xfr_tuple(Tuples) -->
    "{", !, ws, prolog_call(T1), ws, "}",
    xfr_tuples1({T1}, Tuples).
xfr_tuple(Tuples) -->
    % Negated tuple
    "-(", !, ws, xfr_tuple(T1), ws, ")",
    xfr_tuples1(-(T1), Tuples).
xfr_tuple(Tuples) -->
    xfr_fact(T1),
    xfr_tuples1(T1, Tuples).
    


xfr_tuples1(T1, Tuples) -->    
    (
      (ws, ",", !, ws, xfr_tuple(T2), {Tuples = (T1,T2)})
    ;
      (ws, "|", !, ws, xfr_tuple(T2), {Tuples = (T1;T2)})
    ;
      {Tuples = T1}
    ).
      


%-----------------------------------
% Macro body

macro_body(Body) -->
    xfr_tuple(LHS),
    (
      (ws, "*", !, ws, xfr_tuple(RHS), {Body = (LHS * RHS)})
    ;
      {Body = LHS}
    ).


%-----------------------------------
% Template body

template_body(Body) -->
    ws, xfr_rule(B), ws,
    (
      (";", !, ws, template_body(Bs), {Body = (B;Bs)})
    ;
      (".", {Body = B})
    ).


%-----------------------------------
% Arbitrary xfr terms:
% Note that '%<VName>' is used to indicate a named variable

xfr_fact(-(T)) -->
    % Eugh: + and - can be used both to apply resource operators
    % to terms, and also to be atomic terms themselves as '-',
    % '-_', '+', '+_'
    
    "-", !,
    xfr_fact(T).
xfr_fact(+(T)) -->
    "+", !,
    xfr_fact(T).
xfr_fact(T) -->
    % Use @ as an (optional) marker of macro or templates calls
    "@", !,
    xfr_fact(T).
xfr_fact({T}) -->
    "{", !, ws, prolog_call(T), ws, "}".
xfr_fact(List) -->
    "[", !, ws, xfr_facts(List), ws, "]".
xfr_fact(T) -->
    xfr_term(T).


xfr_facts([T|Ts]) -->
    xfr_fact(T), ws, ",", !, ws, xfr_facts(Ts).
xfr_facts([T]) -->
    xfr_fact(T).



xfr_term('%var%'(Atom)) -->
    % Variables:
    {var_char(_,C)},
    [C],  !, xfr_atom(Atom).
xfr_term(T) -->
      xfr_atom(P),
    (
      ("(", !, ws, xfr_terms(Args), ws, ")", {T =.. [P|Args]})
    ;
      (":", !, xfr_term(T2), {T = P:T2})
    ;  
      {T = P}
    ).

xfr_terms([T|Ts]) -->
    xfr_term(T), ws, ",", !, ws, xfr_terms(Ts).
xfr_terms([T]) -->
    xfr_term(T).

%-------
% Atoms:

xfr_atom(Atom) --> xfr_atom_chars(AChars), {name(Atom,AChars)}.

xfr_atom_chars([C|Cs]) --> [0'\\, C], !, xfr_atom_chars(Cs).
xfr_atom_chars([C|Cs]) -->
    [C],
    {\+ member(C, " ([{}]):,;.|*=&\n\t\v\r\f")},
    !, xfr_atom_chars(Cs).
xfr_atom_chars([]) --> [].



%===================================================================


%-----------------------------------
% Prolog call
% We need to recapitulate the full range of standard prolog operators
% here, while being sensitive to the fact that variables and atoms may
% have a non-standard prolog syntax


prolog_call(\+(Call)) -->
    "\\+", !, ws, prolog_call(Call).
prolog_call(Call) -->
    "(", !,
    ws, prolog_call(Call1), ws, ")", ws,
    (
      ";" ->  ws, prolog_call(Call2), {Call = (Call1;Call2)}
    ;
      "," ->  ws, prolog_call(Call2), {Call = (Call1,Call2)}
    ;
      {Call = Call1}
    ).
prolog_call(Call) -->
    prolog_goal(Call1), ws,
    (
      ";" ->  ws, prolog_call(Call2), {Call = (Call1;Call2)}
    ;
      "," ->  ws, prolog_call(Call2), {Call = (Call1,Call2)}
    ;
      {Call = Call1}
    ).




prolog_goal(true) --> "true".
prolog_goal(fail) --> "fail".
prolog_goal(\+(Goal)) -->
    "\\+", !, ws, prolog_goal(Goal).
prolog_goal(Goal) -->
    {var_char(_,V)}, [V], !,
    prolog_constant(C), ws,
    (
      prolog_infix_pred(Op) ->
      ws, prolog_term(Arg), {Goal =..[Op,'%var%'(C),Arg]}
    ;
      {Goal = '%var%'(C)}
    ).
prolog_goal(Goal) -->
    prolog_constant(C),
    (
      "(" ->
      ws, prolog_terms(Args), ws, ")", {Goal =..[C|Args]}
    ;
      ws, prolog_infix_pred(Op) ->
      ws, prolog_term(Arg), {Goal =..[Op,C,Arg]}
    ;
      {Goal = C}
    ).




prolog_term('%var%'(Var)) -->
    {var_char(_,C)}, [C],
    !, prolog_constant(Var).
prolog_term(List) -->
    "[", !, ws, prolog_terms(Ts), ws,
    (
      "|" ->
      ws, prolog_term(T), ws, "]",
      {List = '$list_tail'(Ts,T)}
    ;
      "]" -> {List = Ts}
    ).
prolog_term(Term) -->
    "(", !, ws, prolog_term(T1), ws, ")",
    ws, prolog_infix_op(Op),
    ws, prolog_term(T2), {Term =..[Op,T1,T2]}.
prolog_term(Term) -->
    prolog_constant(C),
    (
      "(" ->
      ws, prolog_terms(Args), ws, ")", {Term =..[C|Args]}
    ;
      ws, prolog_infix_op(Op) ->
      ws, prolog_term(Arg), {Term =..[Op,C,Arg]}
    ;
      {Term = C}
    ).

prolog_terms([T|Ts]) -->
    prolog_term(T), ws,
    (
      (",", !, ws, prolog_terms(Ts))
    ;
      {Ts = []}
    ).

prolog_constant(Atom) --> prolog_atom_chars(AChars), {name(Atom,AChars)}.

prolog_atom_chars([C|Cs]) --> [0'\\, C], !, prolog_atom_chars(Cs).
prolog_atom_chars([C|Cs]) -->
    [C],
    {\+ member(C, " ([{}]):,;.|*+-/><@=&\n\t\v\r\f")},
    !, prolog_atom_chars(Cs).
prolog_atom_chars([]) --> [].


        
    
prolog_infix_pred('=') --> "=".
prolog_infix_pred('==') --> "==".
prolog_infix_pred('\==') --> "\==".
prolog_infix_pred('=\=') --> "=\=".
prolog_infix_pred('>') --> ">".
prolog_infix_pred('<') --> "<".
prolog_infix_pred('=<') --> "=<".
prolog_infix_pred('>=') --> ">=".
prolog_infix_pred('@>') --> "@>".
prolog_infix_pred('@<') --> "@<".
prolog_infix_pred('@=<') --> "@=<".
prolog_infix_pred('@>=') --> "@>=".
prolog_infix_pred('is') --> "is".


prolog_infix_op('*') --> "*".
prolog_infix_op('/') --> "/".
prolog_infix_op('//') --> "//".
prolog_infix_op('+') --> "+".
prolog_infix_op('-') --> "-".
prolog_infix_op('^') --> "^".