defmodule Til.Parser do @moduledoc """ Parser for the Tilly Lisp dialect. It transforms source code into a collection of Node Maps. """ # Represents the current parsing position defstruct offset: 0, line: 1, col: 1, file_name: "unknown", nodes: %{} @doc """ Parses a source string into a map of AST nodes. """ def parse(source_string, file_name \\ "unknown") do file_node_id = System.unique_integer([:monotonic, :positive]) # Initial location for the file node (starts at the beginning) file_start_offset = 0 file_start_line = 1 file_start_col = 1 # End location and raw_string will be finalized after parsing all content prelim_file_node = %{ id: file_node_id, type_id: nil, # File node is the root parent_id: nil, file: file_name, # End TBD location: [file_start_offset, file_start_line, file_start_col, 0, 0, 0], # TBD raw_string: "", ast_node_type: :file, # TBD children: [], parsing_error: nil } initial_state = %__MODULE__{ file_name: file_name, # Add prelim file node nodes: %{file_node_id => prelim_file_node}, # Initial state offset should be 0 for the file offset: 0, # Initial state line should be 1 line: 1, # Initial state col should be 1 col: 1 } # Pass original_source_string for raw_string extraction, and file_node_id as parent for top-level exprs final_state_after_expressions = parse_all_expressions(source_string, source_string, initial_state, file_node_id) # Finalize the file node # Calculate end position of the entire source string {file_end_line, file_end_col} = calculate_new_line_col(source_string, 1, 1) # Offset is 0-indexed, length is the count of characters, so end_offset is length. file_end_offset = String.length(source_string) # Collect children of the file node file_children_ids = final_state_after_expressions.nodes |> Map.values() |> Enum.filter(&(&1.parent_id == file_node_id)) # Sort by start offset to maintain order of appearance in the source |> Enum.sort_by(fn node -> hd(node.location) end) |> Enum.map(& &1.id) updated_file_node = final_state_after_expressions.nodes |> Map.get(file_node_id) |> Map.merge(%{ location: [ file_start_offset, file_start_line, file_start_col, file_end_offset, file_end_line, file_end_col ], # The entire source is the raw string of the file node raw_string: source_string, children: file_children_ids }) final_nodes = Map.put(final_state_after_expressions.nodes, file_node_id, updated_file_node) {:ok, final_nodes} end # --- Main Parsing Logic --- # original_source_string is the complete initial source, source_string is the current remainder # parent_id_for_top_level_expressions is the ID of the node that top-level expressions should be parented to (e.g., the :file node) defp parse_all_expressions( original_source_string, source_string, state, parent_id_for_top_level_expressions ) do case skip_whitespace(source_string, state) do {:eos, final_state} -> final_state {:ok, remaining_source, current_state} -> if remaining_source == "" do # All content parsed, nothing left after skipping whitespace. # This is a successful termination of parsing for the current branch. current_state else # There's actual content to parse. case parse_datum( original_source_string, remaining_source, current_state, parent_id_for_top_level_expressions ) do {:ok, _node_id, next_source, next_state} -> parse_all_expressions( original_source_string, next_source, next_state, parent_id_for_top_level_expressions ) {:error_node, _node_id, _reason, next_source, next_state} -> # An error node was created by parse_datum. # Input was consumed. Continue parsing from next_source. parse_all_expressions( original_source_string, next_source, next_state, parent_id_for_top_level_expressions ) # NOTE: This relies on parse_datum and its components (like create_error_node_and_advance) # to always consume input if source_string is not empty. If parse_datum could return # :error_node without consuming input on a non-empty string, an infinite loop is possible. # Current implementation of create_error_node_and_advance consumes 1 char. end end end end # Parses a single datum: an atom (integer, symbol) or a list. defp parse_datum(original_source_string, source, state, parent_id) do # Peek for multi-character tokens first cond do String.starts_with?(source, "m{") -> # Returns {:ok | :error_node, ...} parse_map_expression(original_source_string, source, state, parent_id) # Fallback to single character dispatch true -> char = String.first(source) cond do char == "(" -> # Returns {:ok | :error_node, ...} parse_s_expression(original_source_string, source, state, parent_id) char == ")" -> # Unexpected closing parenthesis, consume 1 char for the error token ')' # Returns {:error_node, ...} create_error_node_and_advance(source, state, parent_id, 1, "Unexpected ')'") char == "[" -> # Returns {:ok | :error_node, ...} parse_list_expression(original_source_string, source, state, parent_id) char == "]" -> # Unexpected closing square bracket, consume 1 char for the error token ']' # Returns {:error_node, ...} create_error_node_and_advance(source, state, parent_id, 1, "Unexpected ']'") # For tuples char == "{" -> # Returns {:ok | :error_node, ...} parse_tuple_expression(original_source_string, source, state, parent_id) char == "}" -> # Unexpected closing curly brace # Returns {:error_node, ...} create_error_node_and_advance(source, state, parent_id, 1, "Unexpected '}'") char == "'" -> # Returns {:ok | :error_node, ...} parse_string_datum(original_source_string, source, state, parent_id) char == ":" -> # If the first char is ':', try to parse as an atom like :foo case parse_atom_datum(source, state, parent_id) do {:ok, node_id, rest, new_state} -> {:ok, node_id, rest, new_state} {:error, :not_atom} -> # Failed to parse as a specific atom (e.g. ":foo"). # It could be a symbol that starts with ':' (e.g. if we allow ":" as a symbol). # Fallback to general symbol parsing. Integer parsing won't match if it starts with ':'. case parse_symbol_datum(source, state, parent_id) do {:ok, node_id, rest, new_state} -> {:ok, node_id, rest, new_state} {:error, :not_symbol} -> # If it started with ':' but wasn't a valid atom and also not a valid symbol create_error_node_and_advance( source, state, parent_id, 1, "Unknown token starting with ':'" ) end end true -> # Default case for other characters # Try parsing as an integer first case parse_integer_datum(source, state, parent_id) do {:ok, node_id, rest, new_state} -> {:ok, node_id, rest, new_state} {:error, :not_integer} -> # Not an integer, try parsing as a symbol case parse_symbol_datum(source, state, parent_id) do {:ok, node_id, rest, new_state} -> {:ok, node_id, rest, new_state} {:error, :not_symbol} -> # Not a symbol either. Consume 1 char for the unknown token. create_error_node_and_advance(source, state, parent_id, 1, "Unknown token") end end end # end inner cond end # end outer cond end # --- Datum Parsing Helpers --- (parse_string_datum, process_string_content) defp parse_string_datum(_original_source_string, source, state, parent_id) do # state is before consuming "'" initial_state_for_token = state strip_indent = initial_state_for_token.col - 1 # Consume opening "'" {opening_tick, source_after_opening_tick} = String.split_at(source, 1) case :binary.match(source_after_opening_tick, "'") do :nomatch -> # Unclosed string content_segment = source_after_opening_tick raw_token = opening_tick <> content_segment state_at_node_end = advance_pos(initial_state_for_token, raw_token) location = [ initial_state_for_token.offset, initial_state_for_token.line, initial_state_for_token.col, state_at_node_end.offset, state_at_node_end.line, state_at_node_end.col ] processed_value = process_string_content(content_segment, strip_indent) {node_id, state_with_error_node} = add_node( initial_state_for_token, parent_id, location, raw_token, :literal_string, %{value: processed_value, parsing_error: "Unclosed string literal"} ) final_state = %{ state_with_error_node | offset: state_at_node_end.offset, line: state_at_node_end.line, col: state_at_node_end.col } {:error_node, node_id, "Unclosed string literal", "", final_state} # _tick_length will be 1 for "`" {idx_closing_tick_in_segment, _tick_length} -> content_segment = String.slice(source_after_opening_tick, 0, idx_closing_tick_in_segment) closing_tick = "'" raw_token = opening_tick <> content_segment <> closing_tick rest_of_source = String.slice(source_after_opening_tick, (idx_closing_tick_in_segment + 1)..-1//1) state_at_node_end = advance_pos(initial_state_for_token, raw_token) location = [ initial_state_for_token.offset, initial_state_for_token.line, initial_state_for_token.col, state_at_node_end.offset, state_at_node_end.line, state_at_node_end.col ] processed_value = process_string_content(content_segment, strip_indent) {new_node_id, state_with_node} = add_node( initial_state_for_token, parent_id, location, raw_token, :literal_string, %{value: processed_value} ) final_state = %{ state_with_node | offset: state_at_node_end.offset, line: state_at_node_end.line, col: state_at_node_end.col } {:ok, new_node_id, rest_of_source, final_state} end end defp process_string_content(content_str, strip_indent) when strip_indent >= 0 do lines = String.split(content_str, "\n", trim: false) # Will always exist, even for empty content_str -> "" first_line = List.first(lines) rest_lines = if length(lines) > 1 do List.delete_at(lines, 0) else [] end processed_rest_lines = Enum.map(rest_lines, fn line -> current_leading_spaces_count = Regex.run(~r/^(\s*)/, line) |> List.first() |> String.length() spaces_to_remove = min(current_leading_spaces_count, strip_indent) String.slice(line, spaces_to_remove..-1//1) end) all_processed_lines = [first_line | processed_rest_lines] Enum.join(all_processed_lines, "\n") end # --- Datum Parsing Helpers --- (parse_string_datum, process_string_content) # (parse_string_datum remains unchanged) defp parse_atom_datum(source, state, parent_id) do # Atom is a colon followed by one or more non-delimiter characters. # Delimiters are whitespace, (, ), [, ], {, }. # The colon itself is part of the atom's raw string. # The `atom_name_part` is what comes after the colon. case Regex.run(~r/^:([^\s\(\)\[\]\{\}]+)/, source) do # raw_atom_str is like ":foo", atom_name_part is "foo" [raw_atom_str, atom_name_part] -> # The regex [^...]+ ensures atom_name_part is not empty. rest_after_atom = String.slice(source, String.length(raw_atom_str)..-1//1) start_offset = state.offset start_line = state.line start_col = state.col state_after_token = advance_pos(state, raw_atom_str) end_offset = state_after_token.offset end_line = state_after_token.line end_col = state_after_token.col location = [start_offset, start_line, start_col, end_offset, end_line, end_col] # Convert the name part (e.g., "foo") to an Elixir atom (e.g., :foo) atom_value = String.to_atom(atom_name_part) {new_node_id, state_with_node} = add_node( state, parent_id, location, raw_atom_str, :literal_atom, %{value: atom_value} ) final_state = %{ state_with_node | offset: end_offset, line: end_line, col: end_col } {:ok, new_node_id, rest_after_atom, final_state} # No match (nil or list that doesn't conform, e.g., just ":" or ": followed by space/delimiter") _ -> {:error, :not_atom} end end defp parse_integer_datum(source, state, parent_id) do case Integer.parse(source) do {int_val, rest_after_int} -> raw_int = String.slice(source, 0, String.length(source) - String.length(rest_after_int)) start_offset = state.offset start_line = state.line start_col = state.col state_after_token = advance_pos(state, raw_int) end_offset = state_after_token.offset end_line = state_after_token.line end_col = state_after_token.col location = [start_offset, start_line, start_col, end_offset, end_line, end_col] {new_node_id, state_with_node} = add_node(state, parent_id, location, raw_int, :literal_integer, %{value: int_val}) # Update state to reflect consumed token final_state = %{state_with_node | offset: end_offset, line: end_line, col: end_col} {:ok, new_node_id, rest_after_int, final_state} :error -> # Indicates failure, source and state are unchanged by this attempt {:error, :not_integer} end end defp parse_symbol_datum(source, state, parent_id) do # Regex excludes common delimiters. `m{` is handled before symbol parsing. case Regex.run(~r/^([^\s\(\)\[\]\{\}]+)/, source) do [raw_symbol | _] -> rest_after_symbol = String.slice(source, String.length(raw_symbol)..-1//1) start_offset = state.offset start_line = state.line start_col = state.col state_after_token = advance_pos(state, raw_symbol) end_offset = state_after_token.offset end_line = state_after_token.line end_col = state_after_token.col location = [start_offset, start_line, start_col, end_offset, end_line, end_col] {new_node_id, state_with_node} = add_node(state, parent_id, location, raw_symbol, :symbol, %{name: raw_symbol}) # Update state to reflect consumed token final_state = %{ state_with_node | offset: end_offset, line: end_line, col: end_col } {:ok, new_node_id, rest_after_symbol, final_state} nil -> # Indicates failure, source and state are unchanged by this attempt {:error, :not_symbol} end end defp create_error_node_and_advance( source_for_token, state_before_token, parent_id, num_chars_for_token, error_message ) do {raw_token, rest_of_source} = String.split_at(source_for_token, num_chars_for_token) start_offset = state_before_token.offset start_line = state_before_token.line start_col = state_before_token.col state_after_token_consumed = advance_pos(state_before_token, raw_token) end_offset = state_after_token_consumed.offset end_line = state_after_token_consumed.line end_col = state_after_token_consumed.col location = [start_offset, start_line, start_col, end_offset, end_line, end_col] {error_node_id, state_with_error_node} = add_node(state_before_token, parent_id, location, raw_token, :unknown, %{ parsing_error: error_message }) # The state for further parsing must reflect the consumed token's position and include the new error node final_error_state = %{ state_with_error_node | offset: end_offset, line: end_line, col: end_col } {:error_node, error_node_id, error_message, rest_of_source, final_error_state} end defp parse_s_expression(original_source_string, source, state, parent_id) do # Standard S-expression parsing via parse_collection result = parse_collection( original_source_string, source, state, parent_id, "(", ")", :s_expression, "Unclosed S-expression", "Error parsing element in S-expression. Content might be incomplete." ) # After parsing, check if it's an 'fn' expression case result do {:ok, collection_node_id, rest_after_collection, state_after_collection} -> collection_node = Map.get(state_after_collection.nodes, collection_node_id) if is_fn_expression?(collection_node, state_after_collection.nodes) do transformed_node = transform_to_lambda_expression(collection_node, state_after_collection.nodes) final_state = %{ state_after_collection | nodes: Map.put(state_after_collection.nodes, transformed_node.id, transformed_node) } {:ok, transformed_node.id, rest_after_collection, final_state} else # Not an fn expression, return as is result end _error_or_other -> # Propagate errors or other results from parse_collection result end end # Helper to check if an S-expression node is an 'fn' expression defp is_fn_expression?(s_expr_node, nodes_map) do if s_expr_node.ast_node_type == :s_expression && !Enum.empty?(s_expr_node.children) do first_child_id = hd(s_expr_node.children) first_child_node = Map.get(nodes_map, first_child_id) first_child_node && first_child_node.ast_node_type == :symbol && first_child_node.name == "fn" else false end end # Helper to transform a generic S-expression node (known to be an 'fn' form) # into a :lambda_expression node. defp transform_to_lambda_expression(s_expr_node, nodes_map) do # s_expr_node.children = [fn_symbol_id, params_s_expr_id, body_form1_id, ...] # Already checked _fn_symbol_id = Enum.at(s_expr_node.children, 0) if length(s_expr_node.children) < 2 do %{s_expr_node | parsing_error: "Malformed 'fn' expression: missing parameters list."} else params_s_expr_id = Enum.at(s_expr_node.children, 1) params_s_expr_node = Map.get(nodes_map, params_s_expr_id) if !(params_s_expr_node && params_s_expr_node.ast_node_type == :s_expression) do Map.put( s_expr_node, :parsing_error, "Malformed 'fn' expression: parameters list is not an S-expression." ) else # Children of the parameters S-expression, e.g. for (fn ((a integer) (b atom) atom) ...), # param_s_expr_children_ids would be IDs of [(a integer), (b atom), atom] all_param_children_ids = Map.get(params_s_expr_node, :children, []) {arg_spec_node_ids, return_type_spec_node_id} = if Enum.empty?(all_param_children_ids) do # Case: (fn () body) -> No args, nil (inferred) return type spec {[], nil} else # Case: (fn (arg1 type1 ... ret_type) body) # Last element is return type spec, rest are arg specs. args = Enum.take(all_param_children_ids, length(all_param_children_ids) - 1) ret_type_id = List.last(all_param_children_ids) {args, ret_type_id} end # Validate arg_spec_node_ids: each must be a symbol or an S-expr (param_symbol type_spec) all_arg_specs_valid = Enum.all?(arg_spec_node_ids, fn arg_id -> arg_node = Map.get(nodes_map, arg_id) case arg_node do # e.g. x %{ast_node_type: :symbol} -> true # e.g. (x integer) %{ast_node_type: :s_expression, children: s_children} -> if length(s_children) == 2 do param_sym_node = Map.get(nodes_map, hd(s_children)) type_spec_node = Map.get(nodes_map, hd(tl(s_children))) param_sym_node && param_sym_node.ast_node_type == :symbol && type_spec_node && (type_spec_node.ast_node_type == :symbol || type_spec_node.ast_node_type == :s_expression) else # Not a valid (param_symbol type_spec) structure false end # Not a symbol or valid S-expression for arg spec _ -> false end end) # Validate return_type_spec_node_id: must be nil or a valid type specifier node return_type_spec_valid = if is_nil(return_type_spec_node_id) do # Inferred return type is valid true else ret_type_node = Map.get(nodes_map, return_type_spec_node_id) ret_type_node && (ret_type_node.ast_node_type == :symbol || ret_type_node.ast_node_type == :s_expression) end if all_arg_specs_valid && return_type_spec_valid do # Body starts after 'fn' and params_s_expr body_node_ids = Enum.drop(s_expr_node.children, 2) Map.merge(s_expr_node, %{ :ast_node_type => :lambda_expression, :params_s_expr_id => params_s_expr_id, :arg_spec_node_ids => arg_spec_node_ids, :return_type_spec_node_id => return_type_spec_node_id, :body_node_ids => body_node_ids }) else # Determine more specific error message error_message = cond do !all_arg_specs_valid -> "Malformed 'fn' expression: invalid argument specification(s)." !return_type_spec_valid -> "Malformed 'fn' expression: invalid return type specification." # Generic fallback true -> "Malformed 'fn' expression." end Map.put(s_expr_node, :parsing_error, error_message) end end end end defp parse_list_expression(original_source_string, source, state, parent_id) do parse_collection( original_source_string, source, state, parent_id, "[", "]", :list_expression, "Unclosed list", "Error parsing element in list. Content might be incomplete." ) end defp parse_map_expression(original_source_string, source, state, parent_id) do parse_collection( original_source_string, source, state, parent_id, # Opening token "m{", # Closing token "}", :map_expression, "Unclosed map", "Error parsing element in map. Content might be incomplete." ) end defp parse_tuple_expression(original_source_string, source, state, parent_id) do parse_collection( original_source_string, source, state, parent_id, "{", "}", :tuple_expression, "Unclosed tuple", "Error parsing element in tuple. Content might be incomplete." ) end defp parse_collection( original_source_string, source, state, parent_id, open_char_str, # Used by parse_collection_elements close_char_str, ast_node_type, # Used by parse_collection_elements unclosed_error_msg, # Used by parse_collection_elements element_error_msg ) do # Consume opening token (e.g. '(', '[', 'm{') collection_start_offset = state.offset collection_start_line = state.line collection_start_col = state.col open_char_len = String.length(open_char_str) {_opening_token, rest_after_opening_token} = String.split_at(source, open_char_len) current_state = advance_pos(state, open_char_str) collection_node_id = System.unique_integer([:monotonic, :positive]) prelim_collection_node = %{ id: collection_node_id, type_id: nil, parent_id: parent_id, file: current_state.file_name, # End TBD location: [collection_start_offset, collection_start_line, collection_start_col, 0, 0, 0], # TBD raw_string: "", ast_node_type: ast_node_type, children: [], parsing_error: nil } current_state_with_prelim_node = %{ current_state | nodes: Map.put(current_state.nodes, collection_node_id, prelim_collection_node) } collection_start_pos_for_children = {collection_start_offset, collection_start_line, collection_start_col} # Pass all necessary params to the generalized element parser result = parse_collection_elements( original_source_string, rest_after_opening_token, current_state_with_prelim_node, collection_node_id, [], collection_start_pos_for_children, # Parameters for generalization, passed from parse_collection's arguments: # Used by parse_collection_elements close_char_str, # Used by parse_collection_elements unclosed_error_msg, # Passed to parse_collection_elements (might be unused there now) element_error_msg ) # Adapt result to {:ok, node_id, ...} or {:error_node, node_id, ...} case result do {:ok, returned_collection_node_id, rest, state_after_elements} -> {:ok, returned_collection_node_id, rest, state_after_elements} {:error, reason, rest, state_after_elements} -> # The collection_node_id is the ID of the node that has the error. # This 'reason' is typically for unclosed collections or fatal element errors. {:error_node, collection_node_id, reason, rest, state_after_elements} end end # Generalized from parse_s_expression_elements defp parse_collection_elements( original_source_string, source, state, collection_node_id, children_ids_acc, collection_start_pos_tuple, # New parameters for generalization: # e.g., ")" or "]" closing_char_str, # e.g., "Unclosed S-expression" unclosed_error_message, # e.g., "Error parsing element in S-expression..." # Now potentially unused, marked with underscore element_error_message ) do case skip_whitespace(source, state) do {:eos, current_state_at_eos} -> # Unclosed collection collection_node = Map.get(current_state_at_eos.nodes, collection_node_id) start_offset = elem(collection_start_pos_tuple, 0) end_offset = current_state_at_eos.offset actual_raw_string = String.slice(original_source_string, start_offset, end_offset - start_offset) updated_collection_node = %{ collection_node | # Use generalized message parsing_error: unclosed_error_message, children: Enum.reverse(children_ids_acc), location: [ start_offset, elem(collection_start_pos_tuple, 1), elem(collection_start_pos_tuple, 2), end_offset, current_state_at_eos.line, current_state_at_eos.col ], raw_string: actual_raw_string } final_state = %{ current_state_at_eos | nodes: Map.put(current_state_at_eos.nodes, collection_node_id, updated_collection_node) } # This error is for the collection itself being unclosed. # The collection_node_id is implicitly the ID of this error node. {:error, unclosed_error_message, "", final_state} {:ok, remaining_source, current_state} -> # Check if the remaining source starts with the closing token string if String.starts_with?(remaining_source, closing_char_str) do # End of collection closing_char_len = String.length(closing_char_str) {_closing_token, rest_after_closing_token} = String.split_at(remaining_source, closing_char_len) final_collection_state = advance_pos(current_state, closing_char_str) collection_node = Map.get(final_collection_state.nodes, collection_node_id) coll_final_start_offset = elem(collection_start_pos_tuple, 0) coll_final_start_line = elem(collection_start_pos_tuple, 1) coll_final_start_col = elem(collection_start_pos_tuple, 2) coll_final_end_offset = final_collection_state.offset coll_final_end_line = final_collection_state.line coll_final_end_col = final_collection_state.col actual_raw_string = String.slice( original_source_string, coll_final_start_offset, coll_final_end_offset - coll_final_start_offset ) updated_collection_node = %{ collection_node | children: Enum.reverse(children_ids_acc), location: [ coll_final_start_offset, coll_final_start_line, coll_final_start_col, coll_final_end_offset, coll_final_end_line, coll_final_end_col ], raw_string: actual_raw_string } final_state_with_collection = %{ final_collection_state | nodes: Map.put( final_collection_state.nodes, collection_node_id, updated_collection_node ) } {:ok, collection_node_id, rest_after_closing_token, final_state_with_collection} else # Parse an element case parse_datum( original_source_string, remaining_source, current_state, # parent_id for the element collection_node_id ) do {:ok, child_node_id, next_source_after_elem, next_state_after_elem} -> parse_collection_elements( original_source_string, next_source_after_elem, next_state_after_elem, collection_node_id, # Add successful child's ID [child_node_id | children_ids_acc], collection_start_pos_tuple, closing_char_str, unclosed_error_message, # Pass through, though may be unused element_error_message ) {:error_node, child_error_node_id, _child_reason, next_source_after_elem, next_state_after_elem} -> # An error node was created for the child element. Add its ID and continue. parse_collection_elements( original_source_string, next_source_after_elem, next_state_after_elem, collection_node_id, # Add error child's ID [child_error_node_id | children_ids_acc], collection_start_pos_tuple, closing_char_str, unclosed_error_message, # Pass through element_error_message ) # No other return types are expected from parse_datum if it always creates a node on error # or succeeds. If parse_datum could fail without creating a node and without consuming input, # that would be an issue here, potentially leading to infinite loops if not handled. # The current changes aim for parse_datum to always return :ok or :error_node. end end end end # --- Utility Functions --- # Note: The `extra_fields` argument was changed from optional to required # as the default value was never used according to compiler warnings. defp add_node(state, parent_id, location, raw_string, ast_node_type, extra_fields) do node_id = System.unique_integer([:monotonic, :positive]) node = %{ id: node_id, type_id: nil, parent_id: parent_id, file: state.file_name, # [start_offset, start_line, start_col, end_offset, end_line, end_col] location: location, raw_string: raw_string, ast_node_type: ast_node_type } |> Map.merge(extra_fields) {node_id, %{state | nodes: Map.put(state.nodes, node_id, node)}} end defp skip_whitespace(source, state = %__MODULE__{offset: o, line: l, col: c}) do whitespace_match = Regex.run(~r/^\s+/, source) if whitespace_match do [ws | _] = whitespace_match new_offset = o + String.length(ws) {new_line, new_col} = calculate_new_line_col(ws, l, c) remaining_source = String.slice(source, String.length(ws)..-1//1) {:ok, remaining_source, %{state | offset: new_offset, line: new_line, col: new_col}} else if String.length(source) == 0 do {:eos, state} else # No leading whitespace {:ok, source, state} end end end defp calculate_new_line_col(string_segment, start_line, start_col) do string_segment |> String.codepoints() |> Enum.reduce({start_line, start_col}, fn char, {line, col} -> if char == "\n" do {line + 1, 1} else {line, col + 1} end end) end defp advance_pos(state = %__MODULE__{offset: o, line: l, col: c}, consumed_string) do new_offset = o + String.length(consumed_string) {new_line, new_col} = calculate_new_line_col(consumed_string, l, c) %{state | offset: new_offset, line: new_line, col: new_col} end end