:- module(parser, [ parse/2 % parse(-CodeString, -AST) ]). :- use_module(library(dcg/basics)). :- use_module(log). % --- Entry point --- parse(CodeString, AST) :- string_codes(CodeString, Codes), phrase(s_expression(AST), Codes), log(parser, ast(AST)). % --- S-expression parsing --- s_expression(Ast) --> ws, "(", ws, s_expression_items(Items), ws, ")", ws, { build_ast(Items, Ast) }. s_expression(int(N)) --> integer(N), ws. s_expression(string_val(S)) --> string_literal(S), ws. s_expression(bool(true)) --> "true", ws. % Must be before id(Atom) s_expression(bool(false)) --> "false", ws. % Must be before id(Atom) s_expression(id(Atom)) --> identifier(Atom), ws. s_expression(list_nil) --> ws, "(", ws, ")", ws. % Special case for empty list '()' s_expression_items([Item | Rest]) --> s_expression(Item), ws, ( s_expression_items(Rest) | {Rest = []} ). s_expression_items([]) --> []. % For items inside '(...)' % --- AST Construction from S-expression items --- % build_ast(ItemsList, AST) % ItemsList is a Prolog list of ASTs from inside the parens. % E.g., for (if c t e), ItemsList = [id(if), AST_c, AST_t, AST_e] % Keywords build_ast([id(if), Cond, Then, Else], if(Cond, Then, Else)) :- !. build_ast([id(let), id(Var), Value, Body], let(Var, Value, Body)) :- !. % Lambda: (lambda (params...) body) -> (lambda (p1 p2) body) % ParamsSExpr is the AST for (p1 p2 ...), e.g. generic_list([id(p1), id(p2)]) build_ast([id(lambda), ParamsSExpr, Body], lambda(Params, Body)) :- extract_lambda_params(ParamsSExpr, Params), !. % Match: (match Expr ((Pat1 Body1) (Pat2 Body2) ...)) % ClausesSExpr is the AST for ((Pat1 Body1) (Pat2 Body2) ...), % e.g. generic_list([generic_list([Pat1AST, Body1AST]), ...]) build_ast([id(match), Expr, ClausesSExpr], match(Expr, Clauses)) :- extract_match_clauses(ClausesSExpr, Clauses), !. % Data constructors build_ast([id(tuple) | Elements], tuple(Elements)) :- !. build_ast([id(list) | Elements], list_val(Elements)) :- !. % Function application (id as functor) build_ast([id(FunctorName) | Args], apply(id(FunctorName), Args)) :- atom(FunctorName), % Ensure FunctorName is an atom !. % Higher-order function application: ((lambda (x) x) 10) or (VarHoldingLambda 10) % Head of ItemsList is a complex AST (e.g., lambda(...), id(Var)) % This rule now also handles ((lambda () body)) correctly as Args can be []. build_ast([FunctorSExpr | Args], apply(FunctorSExpr, Args)) :- % FunctorSExpr is not an id, or the previous rule for id functor would have caught it. % Or, FunctorSExpr is an id, but it's the *only* thing in the list, e.g. (my_var) -> id(my_var) not apply(id(my_var),[]) % The s_expression(id(Atom)) rule handles single identifiers like (my_var) -> id(my_var). % So, if we are here with [id(X)], it means it's (X) which should be id(X). % This rule is for (F Args...) where F is complex, or (F) where F is complex. % If Items is [SomeComplexAST], it should become apply(SomeComplexAST, []) if it's meant to be a call. % Or generic_list([SomeComplexAST]) if it's just a list with one complex item. % The current s_expression_items structure means Args will be a list. % If (F) is parsed, Items = [FunctorSExpr], so Args = []. !. % Generic list structure if not a keyword or application, e.g. for parameters or clause pairs % Also handles (X) where X is a complex term, parsing to generic_list([X]) build_ast(Items, generic_list(Items)) :- Items \= [], !. build_ast([], list_nil) :- !. % Should have been caught by s_expression(list_nil) if it's top-level () % --- Helpers for AST construction --- % extract_lambda_params(SExpr_representing_param_list, PrologListOfParamNames) extract_lambda_params(list_nil, []) :- !. % Case: (lambda () body) -> ParamsSExpr = list_nil extract_lambda_params(id(AtomParam), [AtomParam]) :- % Case: (lambda x body) -> ParamsSExpr = id(x) atom(AtomParam), get_id_name_from_ast(id(AtomParam), AtomParam), !. % Ensures AtomParam is indeed the name. % Case: (lambda (p1 p2 ...) body) % ParamsSExpr will be apply(id(p1), [id(p2), ...]) due to updated build_ast rules. extract_lambda_params(apply(id(PHead), PArgASTs), [PHead | PArgNames]) :- atom(PHead), is_list(PArgASTs), maplist(get_id_name_from_ast, PArgASTs, PArgNames), !. % Fallback for safety, or if (lambda (generic_list_form_params) body) is ever valid. % This clause might be dead code for typical lambda parameter lists now. extract_lambda_params(generic_list(IdASTs), ParamNames) :- log(parser_warning, 'Lambda parameters parsed as generic_list, check usage: ~w', [generic_list(IdASTs)]), maplist(get_id_name_from_ast, IdASTs, ParamNames), !. get_id_name_from_ast(id(Name), Name). % extract_match_clauses(SExpr_representing_list_of_clauses, PrologListOfClauseASTs) % SExpr for ((p1 b1) (p2 b2)): generic_list([ ClauseSExpr1, ClauseSExpr2, ... ]) % ClauseSExpr1 is generic_list([Pat1AST, Body1AST]) extract_match_clauses(generic_list(ClauseSExprs), ClauseASTs) :- maplist(parse_one_match_clause, ClauseSExprs, ClauseASTs), !. extract_match_clauses(list_nil, []) :- !. % (match expr ()) - no clauses % parse_one_match_clause(SExpr_for_one_clause, clause(PatternAST, true, BodyAST)) % SExpr for (pat body) will be parsed as apply(RawPatternAST, [BodyAST]) parse_one_match_clause(apply(RawPatternAST, [BodyAST]), clause(Pattern, true, BodyAST)) :- ast_to_pattern(RawPatternAST, Pattern), !. % Handle case where a clause might be a single item, e.g. (match x (DefaultCaseBody)) - not standard LISP match parse_one_match_clause(BodyAST, clause(pwild, true, BodyAST)) :- log(parser_warning, 'Match clause parsed as single body term, assuming wildcard pattern: ~w', [BodyAST]). % This assumes that if a clause is not apply(Pat, [Body]), it's just a Body for a wildcard. % This might need refinement based on desired match syntax for single-element clauses. ast_to_pattern(id(Name), pvar(Name)) :- Name \= '_', !. ast_to_pattern(id('_'), pwild) :- !. ast_to_pattern(int(N), pint(N)) :- !. ast_to_pattern(string_val(S), pstring(S)) :- !. ast_to_pattern(bool(B), pbool(B)) :- !. % Pattern for true/false ast_to_pattern(list_nil, plist([])) :- !. % Pattern for () ast_to_pattern(tuple(ElementASTs), ptuple(PatternElements)) :- % (tuple p1 p2) maplist(ast_to_pattern, ElementASTs, PatternElements), !. ast_to_pattern(list_val(ElementASTs), plist(PatternElements)) :- % (list p1 p2) maplist(ast_to_pattern, ElementASTs, PatternElements), !. % For more complex list patterns like (cons head tail) or (list-star p1 p2 ... rest) % ast_to_pattern(generic_list([id(cons), HAST, TAST]), pcons(HPatt, TPatt)) :- !, % ast_to_pattern(HAST, HPatt), ast_to_pattern(TAST, TPatt). % This requires 'cons' to be parsed into generic_list([id(cons),...]) in pattern context. % --- Low-level parsers (mostly unchanged) --- identifier(Atom) --> [C], { code_type(C, alpha) }, % Simplified: starts with alpha string_without(" ()", Codes), % Simplified: no spaces or parens in identifier { atom_codes(Atom, [C|Codes]) }. string_literal(String) --> "\"", string_without("\"", Codes), "\"", { atom_codes(String, Codes) }. ws --> white, ws. ws --> []. % --- Example Usage (for testing in REPL) --- % ?- parse("(if (is_number x) x 0)", AST). % AST = if(is_number(id(x)), id(x), int(0)) % % ?- parse("(let y 10 y)", AST). % AST = let(y, int(10), id(y)) % % ?- parse("(match input ((list x y) x))", AST). % AST = match(id(input), [clause(plist([pvar(x), pvar(y)]), true, id(x))]) % % ?- parse("(match data ((tuple a _) a))", AST). % AST = match(id(data), [clause(ptuple([pvar(a), pwild]), true, id(a))]) % % ?- parse("(lambda (x y) (add x y))", AST). % AST = lambda([x,y], add(id(x),id(y))) % % ?- parse("((lambda (x) x) 10)", AST). % AST = apply(lambda([x],id(x)),[int(10)]) % % ?- parse("(list 1 2 3)", AST). % AST = list_val([int(1), int(2), int(3)]) % % ?- parse("(tuple \"a\" true)", AST). % AST = tuple([string_val("a"), bool(true)]) % % ?- parse("()", AST). % AST = list_nil