elipl/lib/til/parser.ex

defmodule Til.Parser do
  @moduledoc """
  Parser for the Tilly Lisp dialect.
  It transforms source code into a collection of Node Maps.
  """

  # Represents the current parsing position
  defstruct offset: 0, line: 1, col: 1, file_name: "unknown", nodes: %{}

  @doc """
  Parses a source string into a map of AST nodes.
  """
  def parse(source_string, file_name \\ "unknown") do
    file_node_id = System.unique_integer([:monotonic, :positive])

    # Initial location for the file node (starts at the beginning)
    file_start_offset = 0
    file_start_line = 1
    file_start_col = 1

    # End location and raw_string will be finalized after parsing all content
    prelim_file_node = %{
      id: file_node_id,
      type_id: nil,
      # File node is the root
      parent_id: nil,
      file: file_name,
      # End TBD
      location: [file_start_offset, file_start_line, file_start_col, 0, 0, 0],
      # TBD
      raw_string: "",
      ast_node_type: :file,
      # TBD
      children: [],
      parsing_error: nil
    }

    initial_state = %__MODULE__{
      file_name: file_name,
      # Add prelim file node
      nodes: %{file_node_id => prelim_file_node},
      # Initial state offset should be 0 for the file
      offset: 0,
      # Initial state line should be 1
      line: 1,
      # Initial state col should be 1
      col: 1
    }

    # Pass original_source_string for raw_string extraction, and file_node_id as parent for top-level exprs
    final_state_after_expressions =
      parse_all_expressions(source_string, source_string, initial_state, file_node_id)

    # Finalize the file node
    # Calculate end position of the entire source string
    {file_end_line, file_end_col} = calculate_new_line_col(source_string, 1, 1)
    # Offset is 0-indexed, length is the count of characters, so end_offset is length.
    file_end_offset = String.length(source_string)

    # Collect children of the file node
    file_children_ids =
      final_state_after_expressions.nodes
      |> Map.values()
      |> Enum.filter(&(&1.parent_id == file_node_id))
      # Sort by start offset to maintain order of appearance in the source
      |> Enum.sort_by(fn node -> hd(node.location) end)
      |> Enum.map(& &1.id)

    updated_file_node =
      final_state_after_expressions.nodes
      |> Map.get(file_node_id)
      |> Map.merge(%{
        location: [
          file_start_offset,
          file_start_line,
          file_start_col,
          file_end_offset,
          file_end_line,
          file_end_col
        ],
        # The entire source is the raw string of the file node
        raw_string: source_string,
        children: file_children_ids
      })

    final_nodes =
      Map.put(final_state_after_expressions.nodes, file_node_id, updated_file_node)

    {:ok, final_nodes}
  end

  # --- Main Parsing Logic ---

  # original_source_string is the complete initial source, source_string is the current remainder
  # parent_id_for_top_level_expressions is the ID of the node that top-level expressions should be parented to (e.g., the :file node)
  defp parse_all_expressions(
         original_source_string,
         source_string,
         state,
         parent_id_for_top_level_expressions
       ) do
    case skip_whitespace(source_string, state) do
      {:eos, final_state} ->
        final_state

      {:ok, remaining_source, current_state} ->
        if remaining_source == "" do
          # All content parsed, nothing left after skipping whitespace.
          # This is a successful termination of parsing for the current branch.
          current_state
        else
          # There's actual content to parse.
          case parse_datum(
                 original_source_string,
                 remaining_source,
                 current_state,
                 parent_id_for_top_level_expressions
               ) do
            {:ok, _node_id, next_source, next_state} ->
              parse_all_expressions(
                original_source_string,
                next_source,
                next_state,
                parent_id_for_top_level_expressions
              )

            {:error_node, _node_id, _reason, next_source, next_state} ->
              # An error node was created by parse_datum.
              # Input was consumed. Continue parsing from next_source.
              parse_all_expressions(
                original_source_string,
                next_source,
                next_state,
                parent_id_for_top_level_expressions
              )

              # NOTE: This relies on parse_datum and its components (like create_error_node_and_advance)
              # to always consume input if source_string is not empty. If parse_datum could return
              # :error_node without consuming input on a non-empty string, an infinite loop is possible.
              # Current implementation of create_error_node_and_advance consumes 1 char.
          end
        end
    end
  end

  # Parses a single datum: an atom (integer, symbol) or a list.
  defp parse_datum(original_source_string, source, state, parent_id) do
    # Peek for multi-character tokens first
    cond do
      String.starts_with?(source, "m{") ->
        # Returns {:ok | :error_node, ...}
        parse_map_expression(original_source_string, source, state, parent_id)

      # Fallback to single character dispatch
      true ->
        char = String.first(source)

        cond do
          char == "(" ->
            # Returns {:ok | :error_node, ...}
            parse_s_expression(original_source_string, source, state, parent_id)

          char == ")" ->
            # Unexpected closing parenthesis, consume 1 char for the error token ')'
            # Returns {:error_node, ...}
            create_error_node_and_advance(source, state, parent_id, 1, "Unexpected ')'")

          char == "[" ->
            # Returns {:ok | :error_node, ...}
            parse_list_expression(original_source_string, source, state, parent_id)

          char == "]" ->
            # Unexpected closing square bracket, consume 1 char for the error token ']'
            # Returns {:error_node, ...}
            create_error_node_and_advance(source, state, parent_id, 1, "Unexpected ']'")

          # For tuples
          char == "{" ->
            # Returns {:ok | :error_node, ...}
            parse_tuple_expression(original_source_string, source, state, parent_id)

          char == "}" ->
            # Unexpected closing curly brace
            # Returns {:error_node, ...}
            create_error_node_and_advance(source, state, parent_id, 1, "Unexpected '}'")

          char == "'" ->
            # Returns {:ok | :error_node, ...}
            parse_string_datum(original_source_string, source, state, parent_id)

          char == ":" ->
            # If the first char is ':', try to parse as an atom like :foo
            case parse_atom_datum(source, state, parent_id) do
              {:ok, node_id, rest, new_state} ->
                {:ok, node_id, rest, new_state}
              {:error, :not_atom} ->
                # Failed to parse as a specific atom (e.g. ":foo").
                # It could be a symbol that starts with ':' (e.g. if we allow ":" as a symbol).
                # Fallback to general symbol parsing. Integer parsing won't match if it starts with ':'.
                case parse_symbol_datum(source, state, parent_id) do
                  {:ok, node_id, rest, new_state} ->
                    {:ok, node_id, rest, new_state}
                  {:error, :not_symbol} ->
                    # If it started with ':' but wasn't a valid atom and also not a valid symbol
                    create_error_node_and_advance(source, state, parent_id, 1, "Unknown token starting with ':'")
                end
            end

          true ->
            # Default case for other characters
            # Try parsing as an integer first
            case parse_integer_datum(source, state, parent_id) do
              {:ok, node_id, rest, new_state} ->
                {:ok, node_id, rest, new_state}
              {:error, :not_integer} ->
                # Not an integer, try parsing as a symbol
                case parse_symbol_datum(source, state, parent_id) do
                  {:ok, node_id, rest, new_state} ->
                    {:ok, node_id, rest, new_state}
                  {:error, :not_symbol} ->
                    # Not a symbol either. Consume 1 char for the unknown token.
                    create_error_node_and_advance(source, state, parent_id, 1, "Unknown token")
                end
            end
        end # end inner cond
    end # end outer cond
  end

  # --- Datum Parsing Helpers --- (parse_string_datum, process_string_content)

  defp parse_string_datum(_original_source_string, source, state, parent_id) do
    # state is before consuming "'"
    initial_state_for_token = state
    strip_indent = initial_state_for_token.col - 1

    # Consume opening "'"
    {opening_tick, source_after_opening_tick} = String.split_at(source, 1)

    case :binary.match(source_after_opening_tick, "'") do
      :nomatch ->
        # Unclosed string
        content_segment = source_after_opening_tick
        raw_token = opening_tick <> content_segment

        state_at_node_end = advance_pos(initial_state_for_token, raw_token)

        location = [
          initial_state_for_token.offset,
          initial_state_for_token.line,
          initial_state_for_token.col,
          state_at_node_end.offset,
          state_at_node_end.line,
          state_at_node_end.col
        ]

        processed_value = process_string_content(content_segment, strip_indent)

        {node_id, state_with_error_node} =
          add_node(
            initial_state_for_token,
            parent_id,
            location,
            raw_token,
            :literal_string,
            %{value: processed_value, parsing_error: "Unclosed string literal"}
          )

        final_state = %{
          state_with_error_node
          | offset: state_at_node_end.offset,
            line: state_at_node_end.line,
            col: state_at_node_end.col
        }

        {:error_node, node_id, "Unclosed string literal", "", final_state}

      # _tick_length will be 1 for "`"
      {idx_closing_tick_in_segment, _tick_length} ->
        content_segment =
          String.slice(source_after_opening_tick, 0, idx_closing_tick_in_segment)

        closing_tick = "'"
        raw_token = opening_tick <> content_segment <> closing_tick

        rest_of_source =
          String.slice(source_after_opening_tick, (idx_closing_tick_in_segment + 1)..-1)

        state_at_node_end = advance_pos(initial_state_for_token, raw_token)

        location = [
          initial_state_for_token.offset,
          initial_state_for_token.line,
          initial_state_for_token.col,
          state_at_node_end.offset,
          state_at_node_end.line,
          state_at_node_end.col
        ]

        processed_value = process_string_content(content_segment, strip_indent)

        {new_node_id, state_with_node} =
          add_node(
            initial_state_for_token,
            parent_id,
            location,
            raw_token,
            :literal_string,
            %{value: processed_value}
          )

        final_state = %{
          state_with_node
          | offset: state_at_node_end.offset,
            line: state_at_node_end.line,
            col: state_at_node_end.col
        }

        {:ok, new_node_id, rest_of_source, final_state}
    end
  end

  defp process_string_content(content_str, strip_indent) when strip_indent >= 0 do
    lines = String.split(content_str, "\n", trim: false)
    # Will always exist, even for empty content_str -> ""
    first_line = List.first(lines)

    rest_lines =
      if length(lines) > 1 do
        List.delete_at(lines, 0)
      else
        []
      end

    processed_rest_lines =
      Enum.map(rest_lines, fn line ->
        current_leading_spaces_count =
          Regex.run(~r/^(\s*)/, line)
          |> List.first()
          |> String.length()

        spaces_to_remove = min(current_leading_spaces_count, strip_indent)
        String.slice(line, spaces_to_remove..-1)
      end)

    all_processed_lines = [first_line | processed_rest_lines]
    Enum.join(all_processed_lines, "\n")
  end

  # --- Datum Parsing Helpers --- (parse_string_datum, process_string_content)

  # (parse_string_datum remains unchanged)

  defp parse_atom_datum(source, state, parent_id) do
    # Atom is a colon followed by one or more non-delimiter characters.
    # Delimiters are whitespace, (, ), [, ], {, }.
    # The colon itself is part of the atom's raw string.
    # The `atom_name_part` is what comes after the colon.
    case Regex.run(~r/^:([^\s\(\)\[\]\{\}]+)/, source) do
      [raw_atom_str, atom_name_part] -> # raw_atom_str is like ":foo", atom_name_part is "foo"
        # The regex [^...]+ ensures atom_name_part is not empty.
        rest_after_atom = String.slice(source, String.length(raw_atom_str)..-1)
        start_offset = state.offset
        start_line = state.line
        start_col = state.col
        state_after_token = advance_pos(state, raw_atom_str)
        end_offset = state_after_token.offset
        end_line = state_after_token.line
        end_col = state_after_token.col
        location = [start_offset, start_line, start_col, end_offset, end_line, end_col]

        # Convert the name part (e.g., "foo") to an Elixir atom (e.g., :foo)
        atom_value = String.to_atom(atom_name_part)

        {new_node_id, state_with_node} =
          add_node(
            state,
            parent_id,
            location,
            raw_atom_str,
            :literal_atom,
            %{value: atom_value}
          )

        final_state = %{
          state_with_node
          | offset: end_offset,
            line: end_line,
            col: end_col
        }
        {:ok, new_node_id, rest_after_atom, final_state}

      _ -> # No match (nil or list that doesn't conform, e.g., just ":" or ": followed by space/delimiter")
        {:error, :not_atom}
    end
  end

  defp parse_integer_datum(source, state, parent_id) do
    case Integer.parse(source) do
      {int_val, rest_after_int} ->
        raw_int =
          String.slice(source, 0, String.length(source) - String.length(rest_after_int))

        start_offset = state.offset
        start_line = state.line
        start_col = state.col
        state_after_token = advance_pos(state, raw_int)
        end_offset = state_after_token.offset
        end_line = state_after_token.line
        end_col = state_after_token.col
        location = [start_offset, start_line, start_col, end_offset, end_line, end_col]

        {new_node_id, state_with_node} =
          add_node(state, parent_id, location, raw_int, :literal_integer, %{value: int_val})

        # Update state to reflect consumed token
        final_state = %{state_with_node | offset: end_offset, line: end_line, col: end_col}
        {:ok, new_node_id, rest_after_int, final_state}

      :error ->
        # Indicates failure, source and state are unchanged by this attempt
        {:error, :not_integer}
    end
  end

  defp parse_symbol_datum(source, state, parent_id) do
    # Regex excludes common delimiters. `m{` is handled before symbol parsing.
    case Regex.run(~r/^([^\s\(\)\[\]\{\}]+)/, source) do
      [raw_symbol | _] ->
        rest_after_symbol = String.slice(source, String.length(raw_symbol)..-1)
        start_offset = state.offset
        start_line = state.line
        start_col = state.col
        state_after_token = advance_pos(state, raw_symbol)
        end_offset = state_after_token.offset
        end_line = state_after_token.line
        end_col = state_after_token.col
        location = [start_offset, start_line, start_col, end_offset, end_line, end_col]

        {new_node_id, state_with_node} =
          add_node(state, parent_id, location, raw_symbol, :symbol, %{name: raw_symbol})

        # Update state to reflect consumed token
        final_state = %{
          state_with_node
          | offset: end_offset,
            line: end_line,
            col: end_col
        }

        {:ok, new_node_id, rest_after_symbol, final_state}

      nil ->
        # Indicates failure, source and state are unchanged by this attempt
        {:error, :not_symbol}
    end
  end

  defp create_error_node_and_advance(
         source_for_token,
         state_before_token,
         parent_id,
         num_chars_for_token,
         error_message
       ) do
    {raw_token, rest_of_source} = String.split_at(source_for_token, num_chars_for_token)

    start_offset = state_before_token.offset
    start_line = state_before_token.line
    start_col = state_before_token.col

    state_after_token_consumed = advance_pos(state_before_token, raw_token)
    end_offset = state_after_token_consumed.offset
    end_line = state_after_token_consumed.line
    end_col = state_after_token_consumed.col
    location = [start_offset, start_line, start_col, end_offset, end_line, end_col]

    {error_node_id, state_with_error_node} =
      add_node(state_before_token, parent_id, location, raw_token, :unknown, %{
        parsing_error: error_message
      })

    # The state for further parsing must reflect the consumed token's position and include the new error node
    final_error_state = %{
      state_with_error_node
      | offset: end_offset,
        line: end_line,
        col: end_col
    }

    {:error_node, error_node_id, error_message, rest_of_source, final_error_state}
  end

  defp parse_s_expression(original_source_string, source, state, parent_id) do
    # Standard S-expression parsing via parse_collection
    result = parse_collection(
      original_source_string,
      source,
      state,
      parent_id,
      "(",
      ")",
      :s_expression,
      "Unclosed S-expression",
      "Error parsing element in S-expression. Content might be incomplete."
    )

    # After parsing, check if it's an 'fn' expression
    case result do
      {:ok, collection_node_id, rest_after_collection, state_after_collection} ->
        collection_node = Map.get(state_after_collection.nodes, collection_node_id)

        if is_fn_expression?(collection_node, state_after_collection.nodes) do
          transformed_node =
            transform_to_lambda_expression(collection_node, state_after_collection.nodes)

          final_state = %{
            state_after_collection
            | nodes:
                Map.put(state_after_collection.nodes, transformed_node.id, transformed_node)
          }

          {:ok, transformed_node.id, rest_after_collection, final_state}
        else
          # Not an fn expression, return as is
          result
        end

      _error_or_other ->
        # Propagate errors or other results from parse_collection
        result
    end
  end

  # Helper to check if an S-expression node is an 'fn' expression
  defp is_fn_expression?(s_expr_node, nodes_map) do
    if s_expr_node.ast_node_type == :s_expression && !Enum.empty?(s_expr_node.children) do
      first_child_id = hd(s_expr_node.children)
      first_child_node = Map.get(nodes_map, first_child_id)

      first_child_node && first_child_node.ast_node_type == :symbol &&
        first_child_node.name == "fn"
    else
      false
    end
  end

  # Helper to transform a generic S-expression node (known to be an 'fn' form)
  # into a :lambda_expression node.
  defp transform_to_lambda_expression(s_expr_node, nodes_map) do
    # s_expr_node.children = [fn_symbol_id, params_s_expr_id, body_form1_id, ...]
    _fn_symbol_id = Enum.at(s_expr_node.children, 0) # Already checked

    if length(s_expr_node.children) < 2 do
      %{s_expr_node | parsing_error: "Malformed 'fn' expression: missing parameters list."}
    else
      params_s_expr_id = Enum.at(s_expr_node.children, 1)
      params_s_expr_node = Map.get(nodes_map, params_s_expr_id)

      if !(params_s_expr_node && params_s_expr_node.ast_node_type == :s_expression) do
        Map.put(s_expr_node, :parsing_error, "Malformed 'fn' expression: parameters list is not an S-expression.")
      else
        # Children of the parameters S-expression, e.g. for (fn ((a integer) (b atom) atom) ...),
        # param_s_expr_children_ids would be IDs of [(a integer), (b atom), atom]
        all_param_children_ids = Map.get(params_s_expr_node, :children, [])

        {arg_spec_node_ids, return_type_spec_node_id} =
          if Enum.empty?(all_param_children_ids) do
            # Case: (fn () body) -> No args, nil (inferred) return type spec
            {[], nil}
          else
            # Case: (fn (arg1 type1 ... ret_type) body)
            # Last element is return type spec, rest are arg specs.
            args = Enum.take(all_param_children_ids, length(all_param_children_ids) - 1)
            ret_type_id = List.last(all_param_children_ids)
            {args, ret_type_id}
          end

        # Validate arg_spec_node_ids: each must be a symbol or an S-expr (param_symbol type_spec)
        all_arg_specs_valid =
          Enum.all?(arg_spec_node_ids, fn arg_id ->
            arg_node = Map.get(nodes_map, arg_id)
            case arg_node do
              %{ast_node_type: :symbol} -> true # e.g. x
              %{ast_node_type: :s_expression, children: s_children} -> # e.g. (x integer)
                if length(s_children) == 2 do
                  param_sym_node = Map.get(nodes_map, hd(s_children))
                  type_spec_node = Map.get(nodes_map, hd(tl(s_children)))

                  param_sym_node && param_sym_node.ast_node_type == :symbol &&
                  type_spec_node && (type_spec_node.ast_node_type == :symbol || type_spec_node.ast_node_type == :s_expression)
                else
                  false # Not a valid (param_symbol type_spec) structure
                end
              _ -> false # Not a symbol or valid S-expression for arg spec
            end
          end)

        # Validate return_type_spec_node_id: must be nil or a valid type specifier node
        return_type_spec_valid =
          if is_nil(return_type_spec_node_id) do
            true # Inferred return type is valid
          else
            ret_type_node = Map.get(nodes_map, return_type_spec_node_id)
            ret_type_node && (ret_type_node.ast_node_type == :symbol || ret_type_node.ast_node_type == :s_expression)
          end

        if all_arg_specs_valid && return_type_spec_valid do
          body_node_ids = Enum.drop(s_expr_node.children, 2) # Body starts after 'fn' and params_s_expr
          Map.merge(s_expr_node, %{
            :ast_node_type => :lambda_expression,
            :params_s_expr_id => params_s_expr_id,
            :arg_spec_node_ids => arg_spec_node_ids,
            :return_type_spec_node_id => return_type_spec_node_id,
            :body_node_ids => body_node_ids
          })
        else
          # Determine more specific error message
          error_message =
            cond do
              !all_arg_specs_valid -> "Malformed 'fn' expression: invalid argument specification(s)."
              !return_type_spec_valid -> "Malformed 'fn' expression: invalid return type specification."
              true -> "Malformed 'fn' expression." # Generic fallback
            end
          Map.put(s_expr_node, :parsing_error, error_message)
        end
      end
    end
  end

  defp parse_list_expression(original_source_string, source, state, parent_id) do
    parse_collection(
      original_source_string,
      source,
      state,
      parent_id,
      "[",
      "]",
      :list_expression,
      "Unclosed list",
      "Error parsing element in list. Content might be incomplete."
    )
  end

  defp parse_map_expression(original_source_string, source, state, parent_id) do
    parse_collection(
      original_source_string,
      source,
      state,
      parent_id,
      # Opening token
      "m{",
      # Closing token
      "}",
      :map_expression,
      "Unclosed map",
      "Error parsing element in map. Content might be incomplete."
    )
  end

  defp parse_tuple_expression(original_source_string, source, state, parent_id) do
    parse_collection(
      original_source_string,
      source,
      state,
      parent_id,
      "{",
      "}",
      :tuple_expression,
      "Unclosed tuple",
      "Error parsing element in tuple. Content might be incomplete."
    )
  end

  defp parse_collection(
         original_source_string,
         source,
         state,
         parent_id,
         open_char_str,
         # Used by parse_collection_elements
         close_char_str,
         ast_node_type,
         # Used by parse_collection_elements
         unclosed_error_msg,
         # Used by parse_collection_elements
         element_error_msg
       ) do
    # Consume opening token (e.g. '(', '[', 'm{')
    collection_start_offset = state.offset
    collection_start_line = state.line
    collection_start_col = state.col
    open_char_len = String.length(open_char_str)
    {_opening_token, rest_after_opening_token} = String.split_at(source, open_char_len)
    current_state = advance_pos(state, open_char_str)

    collection_node_id = System.unique_integer([:monotonic, :positive])

    prelim_collection_node = %{
      id: collection_node_id,
      type_id: nil,
      parent_id: parent_id,
      file: current_state.file_name,
      # End TBD
      location: [collection_start_offset, collection_start_line, collection_start_col, 0, 0, 0],
      # TBD
      raw_string: "",
      ast_node_type: ast_node_type,
      children: [],
      parsing_error: nil
    }

    current_state_with_prelim_node = %{
      current_state
      | nodes: Map.put(current_state.nodes, collection_node_id, prelim_collection_node)
    }

    collection_start_pos_for_children =
      {collection_start_offset, collection_start_line, collection_start_col}

    # Pass all necessary params to the generalized element parser
    result =
      parse_collection_elements(
        original_source_string,
        rest_after_opening_token,
        current_state_with_prelim_node,
        collection_node_id,
        [],
        collection_start_pos_for_children,
        # Parameters for generalization, passed from parse_collection's arguments:
        # Used by parse_collection_elements
        close_char_str,
        # Used by parse_collection_elements
        unclosed_error_msg,
        # Passed to parse_collection_elements (might be unused there now)
        element_error_msg
      )

    # Adapt result to {:ok, node_id, ...} or {:error_node, node_id, ...}
    case result do
      {:ok, returned_collection_node_id, rest, state_after_elements} ->
        {:ok, returned_collection_node_id, rest, state_after_elements}

      {:error, reason, rest, state_after_elements} ->
        # The collection_node_id is the ID of the node that has the error.
        # This 'reason' is typically for unclosed collections or fatal element errors.
        {:error_node, collection_node_id, reason, rest, state_after_elements}
    end
  end

  # Generalized from parse_s_expression_elements
  defp parse_collection_elements(
         original_source_string,
         source,
         state,
         collection_node_id,
         children_ids_acc,
         collection_start_pos_tuple,
         # New parameters for generalization:
         # e.g., ")" or "]"
         closing_char_str,
         # e.g., "Unclosed S-expression"
         unclosed_error_message,
         # e.g., "Error parsing element in S-expression..."
         # Now potentially unused, marked with underscore
         element_error_message
       ) do
    case skip_whitespace(source, state) do
      {:eos, current_state_at_eos} ->
        # Unclosed collection
        collection_node = Map.get(current_state_at_eos.nodes, collection_node_id)
        start_offset = elem(collection_start_pos_tuple, 0)
        end_offset = current_state_at_eos.offset

        actual_raw_string =
          String.slice(original_source_string, start_offset, end_offset - start_offset)

        updated_collection_node = %{
          collection_node
          | # Use generalized message
            parsing_error: unclosed_error_message,
            children: Enum.reverse(children_ids_acc),
            location: [
              start_offset,
              elem(collection_start_pos_tuple, 1),
              elem(collection_start_pos_tuple, 2),
              end_offset,
              current_state_at_eos.line,
              current_state_at_eos.col
            ],
            raw_string: actual_raw_string
        }

        final_state = %{
          current_state_at_eos
          | nodes:
              Map.put(current_state_at_eos.nodes, collection_node_id, updated_collection_node)
        }

        # This error is for the collection itself being unclosed.
        # The collection_node_id is implicitly the ID of this error node.
        {:error, unclosed_error_message, "", final_state}

      {:ok, remaining_source, current_state} ->
        # Check if the remaining source starts with the closing token string
        if String.starts_with?(remaining_source, closing_char_str) do
          # End of collection
          closing_char_len = String.length(closing_char_str)

          {_closing_token, rest_after_closing_token} =
            String.split_at(remaining_source, closing_char_len)

          final_collection_state = advance_pos(current_state, closing_char_str)
          collection_node = Map.get(final_collection_state.nodes, collection_node_id)

          coll_final_start_offset = elem(collection_start_pos_tuple, 0)
          coll_final_start_line = elem(collection_start_pos_tuple, 1)
          coll_final_start_col = elem(collection_start_pos_tuple, 2)
          coll_final_end_offset = final_collection_state.offset
          coll_final_end_line = final_collection_state.line
          coll_final_end_col = final_collection_state.col

          actual_raw_string =
            String.slice(
              original_source_string,
              coll_final_start_offset,
              coll_final_end_offset - coll_final_start_offset
            )

          updated_collection_node = %{
            collection_node
            | children: Enum.reverse(children_ids_acc),
              location: [
                coll_final_start_offset,
                coll_final_start_line,
                coll_final_start_col,
                coll_final_end_offset,
                coll_final_end_line,
                coll_final_end_col
              ],
              raw_string: actual_raw_string
          }

          final_state_with_collection = %{
            final_collection_state
            | nodes:
                Map.put(
                  final_collection_state.nodes,
                  collection_node_id,
                  updated_collection_node
                )
          }

          {:ok, collection_node_id, rest_after_closing_token, final_state_with_collection}
        else
          # Parse an element
          case parse_datum(
                 original_source_string,
                 remaining_source,
                 current_state,
                 # parent_id for the element
                 collection_node_id
               ) do
            {:ok, child_node_id, next_source_after_elem, next_state_after_elem} ->
              parse_collection_elements(
                original_source_string,
                next_source_after_elem,
                next_state_after_elem,
                collection_node_id,
                # Add successful child's ID
                [child_node_id | children_ids_acc],
                collection_start_pos_tuple,
                closing_char_str,
                unclosed_error_message,
                # Pass through, though may be unused
                element_error_message
              )

            {:error_node, child_error_node_id, _child_reason, next_source_after_elem,
             next_state_after_elem} ->
              # An error node was created for the child element. Add its ID and continue.
              parse_collection_elements(
                original_source_string,
                next_source_after_elem,
                next_state_after_elem,
                collection_node_id,
                # Add error child's ID
                [child_error_node_id | children_ids_acc],
                collection_start_pos_tuple,
                closing_char_str,
                unclosed_error_message,
                # Pass through
                element_error_message
              )

              # No other return types are expected from parse_datum if it always creates a node on error
              # or succeeds. If parse_datum could fail without creating a node and without consuming input,
              # that would be an issue here, potentially leading to infinite loops if not handled.
              # The current changes aim for parse_datum to always return :ok or :error_node.
          end
        end
    end
  end

  # --- Utility Functions ---

  # Note: The `extra_fields` argument was changed from optional to required
  # as the default value was never used according to compiler warnings.
  defp add_node(state, parent_id, location, raw_string, ast_node_type, extra_fields) do
    node_id = System.unique_integer([:monotonic, :positive])

    node =
      %{
        id: node_id,
        type_id: nil,
        parent_id: parent_id,
        file: state.file_name,
        # [start_offset, start_line, start_col, end_offset, end_line, end_col]
        location: location,
        raw_string: raw_string,
        ast_node_type: ast_node_type
      }
      |> Map.merge(extra_fields)

    {node_id, %{state | nodes: Map.put(state.nodes, node_id, node)}}
  end

  defp skip_whitespace(source, state = %__MODULE__{offset: o, line: l, col: c}) do
    whitespace_match = Regex.run(~r/^\s+/, source)

    if whitespace_match do
      [ws | _] = whitespace_match
      new_offset = o + String.length(ws)
      {new_line, new_col} = calculate_new_line_col(ws, l, c)
      remaining_source = String.slice(source, String.length(ws)..-1)
      {:ok, remaining_source, %{state | offset: new_offset, line: new_line, col: new_col}}
    else
      if String.length(source) == 0 do
        {:eos, state}
      else
        # No leading whitespace
        {:ok, source, state}
      end
    end
  end

  defp calculate_new_line_col(string_segment, start_line, start_col) do
    string_segment
    |> String.codepoints()
    |> Enum.reduce({start_line, start_col}, fn char, {line, col} ->
      if char == "\n" do
        {line + 1, 1}
      else
        {line, col + 1}
      end
    end)
  end

  defp advance_pos(state = %__MODULE__{offset: o, line: l, col: c}, consumed_string) do
    new_offset = o + String.length(consumed_string)
    {new_line, new_col} = calculate_new_line_col(consumed_string, l, c)
    %{state | offset: new_offset, line: new_line, col: new_col}
  end
end