149 lines
6.1 KiB
Prolog
149 lines
6.1 KiB
Prolog
:- module(parser,
|
|
[ parse/2 % parse(-CodeString, -AST)
|
|
]).
|
|
|
|
:- use_module(library(dcg/basics)).
|
|
:- use_module(log).
|
|
|
|
% --- Entry point ---
|
|
parse(CodeString, AST) :-
|
|
string_codes(CodeString, Codes),
|
|
phrase(s_expression(AST), Codes),
|
|
log(parser, ast(AST)).
|
|
|
|
% --- S-expression parsing ---
|
|
s_expression(Ast) -->
|
|
ws, "(", ws, s_expression_items(Items), ws, ")", ws,
|
|
{ build_ast(Items, Ast) }.
|
|
s_expression(int(N)) --> integer(N), ws.
|
|
s_expression(string_val(S)) --> string_literal(S), ws.
|
|
s_expression(bool(true)) --> "true", ws. % Must be before id(Atom)
|
|
s_expression(bool(false)) --> "false", ws. % Must be before id(Atom)
|
|
s_expression(id(Atom)) --> identifier(Atom), ws.
|
|
s_expression(list_nil) --> ws, "(", ws, ")", ws. % Special case for empty list '()'
|
|
|
|
s_expression_items([Item | Rest]) -->
|
|
s_expression(Item), ws,
|
|
( s_expression_items(Rest)
|
|
| {Rest = []} ).
|
|
s_expression_items([]) --> []. % For items inside '(...)'
|
|
|
|
% --- AST Construction from S-expression items ---
|
|
% build_ast(ItemsList, AST)
|
|
% ItemsList is a Prolog list of ASTs from inside the parens.
|
|
% E.g., for (if c t e), ItemsList = [id(if), AST_c, AST_t, AST_e]
|
|
|
|
% Keywords
|
|
build_ast([id(if), Cond, Then, Else], if(Cond, Then, Else)) :- !.
|
|
build_ast([id(let), id(Var), Value, Body], let(Var, Value, Body)) :- !.
|
|
|
|
% Lambda: (lambda (params...) body) -> (lambda (p1 p2) body)
|
|
% ParamsSExpr is the AST for (p1 p2 ...), e.g. generic_list([id(p1), id(p2)])
|
|
build_ast([id(lambda), ParamsSExpr, Body], lambda(Params, Body)) :-
|
|
extract_lambda_params(ParamsSExpr, Params), !.
|
|
|
|
% Match: (match Expr ((Pat1 Body1) (Pat2 Body2) ...))
|
|
% ClausesSExpr is the AST for ((Pat1 Body1) (Pat2 Body2) ...),
|
|
% e.g. generic_list([generic_list([Pat1AST, Body1AST]), ...])
|
|
build_ast([id(match), Expr, ClausesSExpr], match(Expr, Clauses)) :-
|
|
extract_match_clauses(ClausesSExpr, Clauses), !.
|
|
|
|
% Data constructors
|
|
build_ast([id(tuple) | Elements], tuple(Elements)) :- !.
|
|
build_ast([id(list) | Elements], list_val(Elements)) :- !.
|
|
|
|
% Function application (must be last among id-starting rules for simple names)
|
|
build_ast([id(FunctorName) | Args], Application) :-
|
|
atom(FunctorName), % Ensure FunctorName is an atom, not a complex term
|
|
Application =.. [FunctorName | Args], !.
|
|
|
|
% Higher-order function application: ((lambda (x) x) 10) or (VarHoldingLambda 10)
|
|
% Head of ItemsList is a complex AST (e.g., lambda(...), id(Var))
|
|
build_ast([FunctorSExpr | Args], apply(FunctorSExpr, Args)) :- Args \= [], !. % Ensure there are arguments
|
|
|
|
% Generic list structure if not a keyword or application, e.g. for parameters or clause pairs
|
|
% Also handles (X) where X is a complex term, parsing to generic_list([X])
|
|
build_ast(Items, generic_list(Items)) :- Items \= [], !.
|
|
build_ast([], list_nil) :- !. % Should have been caught by s_expression(list_nil) if it's top-level ()
|
|
|
|
% --- Helpers for AST construction ---
|
|
|
|
% extract_lambda_params(SExpr_representing_param_list, PrologListOfParamNames)
|
|
% SExpr for (p1 p2 ...): generic_list([id(p1), id(p2), ...])
|
|
extract_lambda_params(generic_list(IdASTs), ParamNames) :-
|
|
maplist(get_id_name_from_ast, IdASTs, ParamNames), !.
|
|
extract_lambda_params(list_nil, []) :- !. % (lambda () body)
|
|
extract_lambda_params(id(ParamAST_single_param), [ParamName]) :- % (lambda x body)
|
|
get_id_name_from_ast(id(ParamAST_single_param), ParamName), !.
|
|
|
|
get_id_name_from_ast(id(Name), Name).
|
|
|
|
% extract_match_clauses(SExpr_representing_list_of_clauses, PrologListOfClauseASTs)
|
|
% SExpr for ((p1 b1) (p2 b2)): generic_list([ ClauseSExpr1, ClauseSExpr2, ... ])
|
|
% ClauseSExpr1 is generic_list([Pat1AST, Body1AST])
|
|
extract_match_clauses(generic_list(ClauseSExprs), ClauseASTs) :-
|
|
maplist(parse_one_match_clause, ClauseSExprs, ClauseASTs), !.
|
|
extract_match_clauses(list_nil, []) :- !. % (match expr ()) - no clauses
|
|
|
|
% parse_one_match_clause(SExpr_for_one_clause, clause(PatternAST, true, BodyAST))
|
|
% SExpr for (pat body): generic_list([RawPatternAST, BodyAST])
|
|
parse_one_match_clause(generic_list([RawPatternAST, BodyAST]), clause(Pattern, true, BodyAST)) :-
|
|
ast_to_pattern(RawPatternAST, Pattern).
|
|
|
|
ast_to_pattern(id(Name), pvar(Name)) :- Name \= '_', !.
|
|
ast_to_pattern(id('_'), pwild) :- !.
|
|
ast_to_pattern(int(N), pint(N)) :- !.
|
|
ast_to_pattern(string_val(S), pstring(S)) :- !.
|
|
ast_to_pattern(bool(B), pbool(B)) :- !. % Pattern for true/false
|
|
ast_to_pattern(list_nil, plist([])) :- !. % Pattern for ()
|
|
ast_to_pattern(tuple(ElementASTs), ptuple(PatternElements)) :- % (tuple p1 p2)
|
|
maplist(ast_to_pattern, ElementASTs, PatternElements), !.
|
|
ast_to_pattern(list_val(ElementASTs), plist(PatternElements)) :- % (list p1 p2)
|
|
maplist(ast_to_pattern, ElementASTs, PatternElements), !.
|
|
% For more complex list patterns like (cons head tail) or (list-star p1 p2 ... rest)
|
|
% ast_to_pattern(generic_list([id(cons), HAST, TAST]), pcons(HPatt, TPatt)) :- !,
|
|
% ast_to_pattern(HAST, HPatt), ast_to_pattern(TAST, TPatt).
|
|
% This requires 'cons' to be parsed into generic_list([id(cons),...]) in pattern context.
|
|
|
|
% --- Low-level parsers (mostly unchanged) ---
|
|
identifier(Atom) -->
|
|
[C], { code_type(C, alpha) }, % Simplified: starts with alpha
|
|
string_without(" ()", Codes), % Simplified: no spaces or parens in identifier
|
|
{ atom_codes(Atom, [C|Codes]) }.
|
|
|
|
string_literal(String) -->
|
|
"\"", string_without("\"", Codes), "\"",
|
|
{ atom_codes(String, Codes) }.
|
|
|
|
ws --> white, ws.
|
|
ws --> [].
|
|
|
|
|
|
% --- Example Usage (for testing in REPL) ---
|
|
% ?- parse("(if (is_number x) x 0)", AST).
|
|
% AST = if(is_number(id(x)), id(x), int(0))
|
|
%
|
|
% ?- parse("(let y 10 y)", AST).
|
|
% AST = let(y, int(10), id(y))
|
|
%
|
|
% ?- parse("(match input ((list x y) x))", AST).
|
|
% AST = match(id(input), [clause(plist([pvar(x), pvar(y)]), true, id(x))])
|
|
%
|
|
% ?- parse("(match data ((tuple a _) a))", AST).
|
|
% AST = match(id(data), [clause(ptuple([pvar(a), pwild]), true, id(a))])
|
|
%
|
|
% ?- parse("(lambda (x y) (add x y))", AST).
|
|
% AST = lambda([x,y], add(id(x),id(y)))
|
|
%
|
|
% ?- parse("((lambda (x) x) 10)", AST).
|
|
% AST = apply(lambda([x],id(x)),[int(10)])
|
|
%
|
|
% ?- parse("(list 1 2 3)", AST).
|
|
% AST = list_val([int(1), int(2), int(3)])
|
|
%
|
|
% ?- parse("(tuple \"a\" true)", AST).
|
|
% AST = tuple([string_val("a"), bool(true)])
|
|
%
|
|
% ?- parse("()", AST).
|
|
% AST = list_nil
|