:- module(parser, [ parse/2 % parse(-CodeString, -AST) ]). :- use_module(library(dcg/basics)). :- use_module(log). % --- Entry point --- parse(CodeString, AST) :- string_codes(CodeString, Codes), phrase(s_expression(AST), Codes), log(parser, ast(AST)). % --- S-expression parsing --- s_expression(Ast) --> ws, "(", ws, s_expression_items(Items), ws, ")", ws, { build_ast(Items, Ast) }. s_expression(int(N)) --> integer(N), ws. s_expression(string_val(S)) --> string_literal(S), ws. s_expression(bool(true)) --> "true", ws. % Must be before id(Atom) s_expression(bool(false)) --> "false", ws. % Must be before id(Atom) s_expression(id(Atom)) --> identifier(Atom), ws. s_expression(list_nil) --> ws, "(", ws, ")", ws. % Special case for empty list '()' s_expression_items([Item | Rest]) --> s_expression(Item), ws, ( s_expression_items(Rest) | {Rest = []} ). s_expression_items([]) --> []. % For items inside '(...)' % --- AST Construction from S-expression items --- % build_ast(ItemsList, AST) % ItemsList is a Prolog list of ASTs from inside the parens. % E.g., for (if c t e), ItemsList = [id(if), AST_c, AST_t, AST_e] % Keywords build_ast([id(if), Cond, Then, Else], if(Cond, Then, Else)) :- !. build_ast([id(let), id(Var), Value, Body], let(Var, Value, Body)) :- !. % Lambda: (lambda (params...) body) -> (lambda (p1 p2) body) % ParamsSExpr is the AST for (p1 p2 ...), e.g. generic_list([id(p1), id(p2)]) build_ast([id(lambda), ParamsSExpr, Body], lambda(Params, Body)) :- extract_lambda_params(ParamsSExpr, Params), !. % Match: (match Expr ((Pat1 Body1) (Pat2 Body2) ...)) % ClausesSExpr is the AST for ((Pat1 Body1) (Pat2 Body2) ...), % e.g. generic_list([generic_list([Pat1AST, Body1AST]), ...]) build_ast([id(match), Expr, ClausesSExpr], match(Expr, Clauses)) :- extract_match_clauses(ClausesSExpr, Clauses), !. % Data constructors build_ast([id(tuple) | Elements], tuple(Elements)) :- !. build_ast([id(list) | Elements], list_val(Elements)) :- !. % Function application (must be last among id-starting rules for simple names) build_ast([id(FunctorName) | Args], Application) :- atom(FunctorName), % Ensure FunctorName is an atom, not a complex term Application =.. [FunctorName | Args], !. % Higher-order function application: ((lambda (x) x) 10) or (VarHoldingLambda 10) % Head of ItemsList is a complex AST (e.g., lambda(...), id(Var)) build_ast([FunctorSExpr | Args], apply(FunctorSExpr, Args)) :- Args \= [], !. % Ensure there are arguments % Generic list structure if not a keyword or application, e.g. for parameters or clause pairs % Also handles (X) where X is a complex term, parsing to generic_list([X]) build_ast(Items, generic_list(Items)) :- Items \= [], !. build_ast([], list_nil) :- !. % Should have been caught by s_expression(list_nil) if it's top-level () % --- Helpers for AST construction --- % extract_lambda_params(SExpr_representing_param_list, PrologListOfParamNames) % SExpr for (p1 p2 ...): generic_list([id(p1), id(p2), ...]) extract_lambda_params(generic_list(IdASTs), ParamNames) :- maplist(get_id_name_from_ast, IdASTs, ParamNames), !. extract_lambda_params(list_nil, []) :- !. % (lambda () body) extract_lambda_params(id(ParamAST_single_param), [ParamName]) :- % (lambda x body) get_id_name_from_ast(id(ParamAST_single_param), ParamName), !. get_id_name_from_ast(id(Name), Name). % extract_match_clauses(SExpr_representing_list_of_clauses, PrologListOfClauseASTs) % SExpr for ((p1 b1) (p2 b2)): generic_list([ ClauseSExpr1, ClauseSExpr2, ... ]) % ClauseSExpr1 is generic_list([Pat1AST, Body1AST]) extract_match_clauses(generic_list(ClauseSExprs), ClauseASTs) :- maplist(parse_one_match_clause, ClauseSExprs, ClauseASTs), !. extract_match_clauses(list_nil, []) :- !. % (match expr ()) - no clauses % parse_one_match_clause(SExpr_for_one_clause, clause(PatternAST, true, BodyAST)) % SExpr for (pat body): generic_list([RawPatternAST, BodyAST]) parse_one_match_clause(generic_list([RawPatternAST, BodyAST]), clause(Pattern, true, BodyAST)) :- ast_to_pattern(RawPatternAST, Pattern). ast_to_pattern(id(Name), pvar(Name)) :- Name \= '_', !. ast_to_pattern(id('_'), pwild) :- !. ast_to_pattern(int(N), pint(N)) :- !. ast_to_pattern(string_val(S), pstring(S)) :- !. ast_to_pattern(bool(B), pbool(B)) :- !. % Pattern for true/false ast_to_pattern(list_nil, plist([])) :- !. % Pattern for () ast_to_pattern(tuple(ElementASTs), ptuple(PatternElements)) :- % (tuple p1 p2) maplist(ast_to_pattern, ElementASTs, PatternElements), !. ast_to_pattern(list_val(ElementASTs), plist(PatternElements)) :- % (list p1 p2) maplist(ast_to_pattern, ElementASTs, PatternElements), !. % For more complex list patterns like (cons head tail) or (list-star p1 p2 ... rest) % ast_to_pattern(generic_list([id(cons), HAST, TAST]), pcons(HPatt, TPatt)) :- !, % ast_to_pattern(HAST, HPatt), ast_to_pattern(TAST, TPatt). % This requires 'cons' to be parsed into generic_list([id(cons),...]) in pattern context. % --- Low-level parsers (mostly unchanged) --- identifier(Atom) --> [C], { code_type(C, alpha) }, % Simplified: starts with alpha string_without(" ()", Codes), % Simplified: no spaces or parens in identifier { atom_codes(Atom, [C|Codes]) }. string_literal(String) --> "\"", string_without("\"", Codes), "\"", { atom_codes(String, Codes) }. ws --> white, ws. ws --> []. % --- Example Usage (for testing in REPL) --- % ?- parse("(if (is_number x) x 0)", AST). % AST = if(is_number(id(x)), id(x), int(0)) % % ?- parse("(let y 10 y)", AST). % AST = let(y, int(10), id(y)) % % ?- parse("(match input ((list x y) x))", AST). % AST = match(id(input), [clause(plist([pvar(x), pvar(y)]), true, id(x))]) % % ?- parse("(match data ((tuple a _) a))", AST). % AST = match(id(data), [clause(ptuple([pvar(a), pwild]), true, id(a))]) % % ?- parse("(lambda (x y) (add x y))", AST). % AST = lambda([x,y], add(id(x),id(y))) % % ?- parse("((lambda (x) x) 10)", AST). % AST = apply(lambda([x],id(x)),[int(10)]) % % ?- parse("(list 1 2 3)", AST). % AST = list_val([int(1), int(2), int(3)]) % % ?- parse("(tuple \"a\" true)", AST). % AST = tuple([string_val("a"), bool(true)]) % % ?- parse("()", AST). % AST = list_nil