defmodule Til.Typer do @moduledoc """ Handles type checking and type inference for the Tilly Lisp dialect. It processes the AST (Node Maps) generated by the parser and annotates nodes with their inferred or checked types. """ # alias Til.AstUtils # Removed as it's not used yet and causes a warning. # alias MapSet, as: Set # No longer directly used here, moved to specialized modules alias Til.Typer.Types alias Til.Typer.Interner alias Til.Typer.ExpressionTyper # alias Til.Typer.SubtypeChecker # Not directly used in this module after refactor alias Til.Typer.Environment @doc """ Performs type checking and inference on a map of AST nodes. It iterates through the nodes, infers their types, and updates the `:type_id` field in each node map with a reference to its type. Returns a new map of nodes with type information. """ def type_check(nodes_map) when is_map(nodes_map) do initial_env = %{} pre_populated_nodes_map = Interner.populate_known_types(nodes_map) # Find the main file node to start traversal. # Assumes parser always generates a :file node as the root of top-level expressions. case Enum.find(Map.values(pre_populated_nodes_map), &(&1.ast_node_type == :file)) do nil -> # Should not happen with current parser, but handle defensively. # Or an error: {:error, :no_file_node_found} # Return map with known types at least {:ok, pre_populated_nodes_map} file_node -> # Start recursive typing from the file node. # The environment modifications will propagate through the traversal. # The result is {:ok, final_nodes_map, _final_env}. We only need final_nodes_map here. case type_node_recursively(file_node.id, pre_populated_nodes_map, initial_env) do {:ok, final_nodes_map, final_env} -> # IO.inspect(final_env, label: "Final Environment after Typing (should show type keys)") # IO.inspect(final_nodes_map, label: "Final Nodes Map (should contain type definitions)") {:ok, final_nodes_map} # Propagate other return values (e.g., errors) if they occur, # though current implementation of type_node_recursively always returns {:ok, _, _}. other_result -> other_result end end end # Main recursive function for typing nodes. # Handles node lookup and delegates to do_type_node for actual processing. defp type_node_recursively(node_id, nodes_map, env) do case Map.get(nodes_map, node_id) do nil -> # This case should ideally not be reached if node_ids are always valid. # Consider logging an error here. # IO.inspect("Warning: Node ID #{node_id} not found in nodes_map during typing.", label: "Typer") # No change if node_id is invalid {:ok, nodes_map, env} node_data -> # Delegate to the worker function that processes the node. do_type_node(node_data, nodes_map, env) end end # Worker function to process a single node. # Orchestrates typing children, inferring current node's type, and updating environment. defp do_type_node(node_data, nodes_map, env) do # Determine the environment and children to type based on node type {children_to_process_ids, env_for_children, nodes_map_after_pre_processing} = if node_data.ast_node_type == :lambda_expression do # For lambdas: (fn params_s_expr body...) # The 'fn' symbol (child 0) and 'params_s_expr' (child 1) are typed with the outer env. # The body_node_ids are typed with the inner lambda_body_env. # Type 'fn' symbol (first child of the original S-expression) fn_op_child_id = hd(node_data.children) {:ok, nmap_after_fn_op, env_after_fn_op} = type_node_recursively(fn_op_child_id, nodes_map, env) # Type params_s_expr (second child of the original S-expression) # This node (node_data) has `params_s_expr_id` from the parser. params_s_expr_node_id = node_data.params_s_expr_id {:ok, nmap_after_params_s_expr, env_after_params_s_expr} = type_node_recursively(params_s_expr_node_id, nmap_after_fn_op, env_after_fn_op) # Create lambda body environment using arg_spec_node_ids. # The lambda_expression node has `arg_spec_node_ids` and `return_type_spec_node_id`. # Argument types need to be resolved and interned here to populate the env. # nodes_map is nmap_after_params_s_expr at this point. {lambda_body_env, nmap_after_arg_type_resolution} = Enum.reduce( node_data.arg_spec_node_ids, {env_after_params_s_expr, nmap_after_params_s_expr}, fn arg_spec_id, {acc_env, acc_nodes_map} -> arg_spec_node = Map.get(acc_nodes_map, arg_spec_id) case arg_spec_node.ast_node_type do # Unannotated param, e.g., x :symbol -> param_name = arg_spec_node.name param_type_key = Types.primitive_type_key(:any) {Map.put(acc_env, param_name, param_type_key), acc_nodes_map} # Annotated param, e.g., (x integer) :s_expression -> param_symbol_node_id = hd(arg_spec_node.children) type_spec_node_id = hd(tl(arg_spec_node.children)) param_symbol_node = Map.get(acc_nodes_map, param_symbol_node_id) type_spec_node = Map.get(acc_nodes_map, type_spec_node_id) param_name = param_symbol_node.name # Resolve and intern the type specifier {raw_type_def, nmap_after_resolve} = ExpressionTyper.resolve_type_specifier_node(type_spec_node, acc_nodes_map) {param_type_key, nmap_after_intern} = Interner.get_or_intern_type(raw_type_def, nmap_after_resolve) {Map.put(acc_env, param_name, param_type_key), nmap_after_intern} end end ) # Children to process with this new env are the body_node_ids {node_data.body_node_ids, lambda_body_env, nmap_after_arg_type_resolution} else # Default: type all children with the current environment {Map.get(node_data, :children, []), env, nodes_map} end # 1. Recursively type the identified children with the determined environment. {nodes_map_after_children, env_after_children} = Enum.reduce( children_to_process_ids, {nodes_map_after_pre_processing, env_for_children}, fn child_id, {acc_nodes_map, acc_env} -> {:ok, next_nodes_map, next_env} = type_node_recursively(child_id, acc_nodes_map, acc_env) {next_nodes_map, next_env} end ) # Retrieve the current node's data from the potentially updated nodes_map. # More importantly, infer_type_for_node_ast needs the nodes_map_after_children # to look up typed children. current_node_from_map = Map.get(nodes_map_after_children, node_data.id) # 2. Infer type for the current node. # infer_type_for_node_ast now returns {type_definition_map, possibly_updated_nodes_map}. {type_definition_for_current_node, nodes_map_after_inference_logic} = infer_type_for_node_ast( current_node_from_map, nodes_map_after_children, env_after_children ) # Intern this type definition to get a key and update nodes_map. {type_key_for_current_node, nodes_map_after_interning} = Interner.get_or_intern_type( type_definition_for_current_node, nodes_map_after_inference_logic ) # Update current node with the type key. # Ensure we are updating the version of the node from nodes_map_after_interning # (which is based on nodes_map_after_children). re_fetched_current_node_data = Map.get(nodes_map_after_interning, current_node_from_map.id) updated_current_node = Map.put(re_fetched_current_node_data, :type_id, type_key_for_current_node) nodes_map_with_typed_node = Map.put(nodes_map_after_interning, updated_current_node.id, updated_current_node) # 3. Update environment based on the current typed node (e.g., for assignments). # update_env_from_node now returns {updated_env, updated_nodes_map}. {env_after_current_node, nodes_map_after_env_update} = Environment.update_env_from_node( updated_current_node, nodes_map_with_typed_node, env_after_children ) {:ok, nodes_map_after_env_update, env_after_current_node} end # Infers the type for a node based on its AST type and current environment. # `nodes_map` contains potentially typed children (whose :type_id is a key) and canonical type definitions. # `env` is the current typing environment (symbol names to type keys). # Returns {type_definition_map, possibly_updated_nodes_map}. defp infer_type_for_node_ast(node_data, nodes_map, env) do case node_data.ast_node_type do :literal_integer -> {%{type_kind: :literal, value: node_data.value}, nodes_map} :literal_string -> {%{type_kind: :literal, value: node_data.value}, nodes_map} # Atoms are parsed as :literal_atom with a :value field containing the Elixir atom (as per parser.ex) :literal_atom -> {%{type_kind: :literal, value: node_data.value}, nodes_map} :symbol -> case node_data.name do "nil" -> {Types.get_literal_type(:nil_atom), nodes_map} "true" -> {Types.get_literal_type(:true_atom), nodes_map} "false" -> {Types.get_literal_type(:false_atom), nodes_map} _ -> # Look up symbol in the environment. env stores type keys. case Map.get(env, node_data.name) do nil -> # Symbol not found. Default to :any type definition. # TODO: Handle unresolved symbols more robustly (e.g., specific error type). {Types.get_primitive_type(:any), nodes_map} found_type_key -> # Resolve the key to its definition from nodes_map. case Map.get(nodes_map, found_type_key) do nil -> # This indicates an inconsistency if a key from env isn't in nodes_map. # Default to :any or an error type. # IO.warn("Type key #{inspect(found_type_key)} for symbol '#{node_data.name}' not found in nodes_map.") # Or a specific error type definition {Types.get_primitive_type(:any), nodes_map} type_definition -> {type_definition, nodes_map} end end end :s_expression -> ExpressionTyper.infer_s_expression_type(node_data, nodes_map, env) :list_expression -> children_ids = Map.get(node_data, :children, []) num_children = length(children_ids) element_type_definition = cond do num_children == 0 -> Types.get_primitive_type(:nothing) true -> # Children are already typed. Get their type definitions. child_type_defs = Enum.map(children_ids, fn child_id -> # nodes_map is nodes_map_after_children child_node = Map.get(nodes_map, child_id) type_key_for_child = child_node.type_id # Resolve the type key to its definition. type_def_for_child = Map.get(nodes_map, type_key_for_child) if is_nil(type_def_for_child) do # Fallback, should ideally not happen if children are correctly typed. Types.get_primitive_type(:any) else type_def_for_child end end) # Determine a common element type. distinct_child_type_defs = Enum.uniq(child_type_defs) cond do length(distinct_child_type_defs) == 1 -> # All elements effectively have the same type definition (e.g., [1, 1, 1] -> Literal 1). List.first(distinct_child_type_defs) true -> # Form a union of the distinct child types. # E.g., [1, 2, 3] -> (Union (Literal 1) (Literal 2) (Literal 3)) # E.g., [1, "a"] -> (Union (Literal 1) (Literal "a")) # The types in distinct_child_type_defs are already resolved definitions. # The interner will handle canonicalizing this union type. %{type_kind: :union, types: MapSet.new(distinct_child_type_defs)} end end list_type_def = %{ type_kind: :list, # This is the full def; interner will use its key. element_type: element_type_definition, length: num_children } {list_type_def, nodes_map} :file -> # The :file node itself doesn't have a typical "type". {Types.get_special_type(:file_marker), nodes_map} :map_expression -> children_ids = Map.get(node_data, :children, []) # Children are [key1, value1, key2, value2, ...] known_elements_raw = children_ids # [[k1,v1], [k2,v2]] |> Enum.chunk_every(2) |> Enum.reduce_while(%{}, fn [key_node_id, value_node_id], acc_known_elements -> key_node = Map.get(nodes_map, key_node_id) value_node = Map.get(nodes_map, value_node_id) # Key's type must be a literal type for it to be used in known_elements. # Child nodes (keys and values) are already typed at this stage. key_type_def = if key_node && key_node.type_id do Map.get(nodes_map, key_node.type_id) else # Key node or its type_id is missing nil end cond do key_type_def && key_type_def.type_kind == :literal && value_node -> literal_key_value = key_type_def.value # Value node should have been typed, its type_id points to its definition value_type_def = Map.get(nodes_map, value_node.type_id, Types.get_primitive_type(:any)) updated_elements = Map.put( acc_known_elements, literal_key_value, %{value_type: value_type_def, optional: false} ) {:cont, updated_elements} true -> # If a key's type is not a literal, or key/value nodes are missing, # this map literal cannot be precisely typed with known_elements. # Halt and return empty known_elements, leading to a less specific type. # IO.warn( # "Map literal key is not a literal type or node data missing. Key node: #{inspect(key_node)}, Key type: #{inspect(key_type_def)}" # ) {:halt, %{}} end end) # Default index signature for map literals: any other key maps to any value. default_index_signature = %{ key_type: Types.get_primitive_type(:any), value_type: Types.get_primitive_type(:any) } map_type_def = %{ type_kind: :map, known_elements: known_elements_raw, index_signature: default_index_signature } {map_type_def, nodes_map} :tuple_expression -> children_ids = Map.get(node_data, :children, []) element_type_defs = Enum.map(children_ids, fn child_id -> # nodes_map is nodes_map_after_children child_node = Map.get(nodes_map, child_id) # This should be set from prior typing. type_key_for_child = child_node.type_id # Resolve the type key to its definition. type_def_for_child = Map.get(nodes_map, type_key_for_child) if is_nil(type_def_for_child) do # This case indicates an internal inconsistency: # a child node has a type_id, but that ID doesn't resolve to a type definition. # This shouldn't happen in a correctly functioning typer. # Fallback to :any for robustness, but log or signal error if possible. # IO.warn("Tuple element #{child_id} (in node #{node_data.id}) has type_id #{type_key_for_child} but no definition in nodes_map.") Types.get_primitive_type(:any) else type_def_for_child end end) tuple_type_def = %{type_kind: :tuple, element_types: element_type_defs} # nodes_map is unchanged here; interning of this new tuple_type_def happens later. {tuple_type_def, nodes_map} :lambda_expression -> # node_data is the :lambda_expression node. # Its body_node_ids have been typed using the lambda_body_env. # nodes_map is nodes_map_after_children. # Resolve argument types for the function signature {raw_arg_type_defs, nodes_map_after_args} = Enum.map_reduce( node_data.arg_spec_node_ids, # This is nodes_map_after_children from do_type_node nodes_map, fn arg_spec_id, acc_nodes_map -> arg_spec_node = Map.get(acc_nodes_map, arg_spec_id) case arg_spec_node.ast_node_type do # Unannotated param :symbol -> {Types.get_primitive_type(:any), acc_nodes_map} # Annotated param (param_symbol type_spec) :s_expression -> type_spec_node_id = hd(tl(arg_spec_node.children)) type_spec_node = Map.get(acc_nodes_map, type_spec_node_id) ExpressionTyper.resolve_type_specifier_node(type_spec_node, acc_nodes_map) end end ) # Resolve/Infer return type for the function signature {return_type_def_for_signature, nodes_map_after_return} = if node_data.return_type_spec_node_id do # Explicit return type annotation return_type_spec_node = Map.get(nodes_map_after_args, node_data.return_type_spec_node_id) {expected_return_raw_def, nmap_after_ret_resolve} = ExpressionTyper.resolve_type_specifier_node( return_type_spec_node, nodes_map_after_args ) # Intern the expected return type to get its canonical form for checks {expected_return_key, nmap_after_ret_intern} = Interner.get_or_intern_type(expected_return_raw_def, nmap_after_ret_resolve) expected_return_interned_def = Map.get(nmap_after_ret_intern, expected_return_key) # Check if actual body return type is subtype of annotated return type _actual_body_return_interned_def = if Enum.empty?(node_data.body_node_ids) do # Raw, but interner handles it Types.get_literal_type(:nil_atom) else last_body_expr_node = Map.get(nmap_after_ret_intern, List.last(node_data.body_node_ids)) # Already interned Map.get(nmap_after_ret_intern, last_body_expr_node.type_id) end # Perform subtype check if needed (for error reporting, not changing signature type yet) # if !SubtypeChecker.is_subtype?(_actual_body_return_interned_def, expected_return_interned_def, nmap_after_ret_intern) do # IO.warn("Lambda body return type mismatch with annotation.") # Placeholder for error # end {expected_return_interned_def, nmap_after_ret_intern} else # Infer return type from body inferred_return_def = if Enum.empty?(node_data.body_node_ids) do Types.get_literal_type(:nil_atom) else last_body_expr_node = Map.get(nodes_map_after_args, List.last(node_data.body_node_ids)) # Already interned Map.get(nodes_map_after_args, last_body_expr_node.type_id) end {inferred_return_def, nodes_map_after_args} end function_type_raw_def = %{ type_kind: :function, arg_types: raw_arg_type_defs, # This is an interned def or raw primitive/literal return_type: return_type_def_for_signature, type_params: [] } {function_type_raw_def, nodes_map_after_return} # Default for other AST node types _ -> # Placeholder: return :any type definition. {Types.get_primitive_type(:any), nodes_map} end end end