checkpoint failing test after fixing tests checkpoint checkpoint checkpoint re-work asd checkpoint checkpoint checkpoint mix proj checkpoint mix first parser impl checkpoint fix tests re-org parser checkpoint strings fix multiline strings tuples checkpoint maps checkpoint checkpoint checkpoint checkpoint fix weird eof expression parse error checkpoint before typing checkpoint checpoint checkpoint checkpoint checkpoint ids in primitive types checkpoint checkpoint fix tests initial annotation checkpoint checkpoint checkpoint union subtyping conventions refactor - split typer typing tuples checkpoint test refactor checkpoint test refactor parsing atoms checkpoint atoms wip lists checkpoint typing lists checkopint checkpoint wip fixing correct list typing map discussion checkpoint map basic typing fix tests checkpoint checkpoint checkpoint checkpoint fix condition typing fix literal keys in map types checkpoint union types checkpoint union type checkpoint row types discussion & bidirectional typecheck checkpoint basic lambdas checkpoint lambdas typing application wip function application checkpoint checkpoint checkpoint cduce checkpoint checkpoint checkpoint checkpoint checkpoint checkpoint checkpoint
505 lines
21 KiB
Elixir
505 lines
21 KiB
Elixir
defmodule Til.Typer do
|
|
@moduledoc """
|
|
Handles type checking and type inference for the Tilly Lisp dialect.
|
|
It processes the AST (Node Maps) generated by the parser and annotates
|
|
nodes with their inferred or checked types.
|
|
"""
|
|
|
|
# alias Til.AstUtils # Removed as it's not used yet and causes a warning.
|
|
# alias MapSet, as: Set # No longer directly used here, moved to specialized modules
|
|
|
|
alias Til.Typer.Types
|
|
alias Til.Typer.Interner
|
|
alias Til.Typer.ExpressionTyper
|
|
# alias Til.Typer.SubtypeChecker # Not directly used in this module after refactor
|
|
alias Til.Typer.Environment
|
|
|
|
@doc """
|
|
Performs type checking and inference on a map of AST nodes.
|
|
|
|
It iterates through the nodes, infers their types, and updates the
|
|
`:type_id` field in each node map with a reference to its type.
|
|
|
|
Returns a new map of nodes with type information.
|
|
"""
|
|
def type_check(nodes_map) when is_map(nodes_map) do
|
|
initial_env = %{}
|
|
pre_populated_nodes_map = Interner.populate_known_types(nodes_map)
|
|
|
|
# Find the main file node to start traversal.
|
|
# Assumes parser always generates a :file node as the root of top-level expressions.
|
|
case Enum.find(Map.values(pre_populated_nodes_map), &(&1.ast_node_type == :file)) do
|
|
nil ->
|
|
# Should not happen with current parser, but handle defensively.
|
|
# Or an error: {:error, :no_file_node_found}
|
|
# Return map with known types at least
|
|
{:ok, pre_populated_nodes_map}
|
|
|
|
file_node ->
|
|
# Start recursive typing from the file node.
|
|
# The environment modifications will propagate through the traversal.
|
|
# The result is {:ok, final_nodes_map, _final_env}. We only need final_nodes_map here.
|
|
case type_node_recursively(file_node.id, pre_populated_nodes_map, initial_env) do
|
|
{:ok, final_nodes_map, final_env} ->
|
|
# IO.inspect(final_env, label: "Final Environment after Typing (should show type keys)")
|
|
|
|
# IO.inspect(final_nodes_map, label: "Final Nodes Map (should contain type definitions)")
|
|
{:ok, final_nodes_map}
|
|
|
|
# Propagate other return values (e.g., errors) if they occur,
|
|
# though current implementation of type_node_recursively always returns {:ok, _, _}.
|
|
other_result ->
|
|
other_result
|
|
end
|
|
end
|
|
end
|
|
|
|
# Main recursive function for typing nodes.
|
|
# Handles node lookup and delegates to do_type_node for actual processing.
|
|
defp type_node_recursively(node_id, nodes_map, env) do
|
|
case Map.get(nodes_map, node_id) do
|
|
nil ->
|
|
# This case should ideally not be reached if node_ids are always valid.
|
|
# Consider logging an error here.
|
|
# IO.inspect("Warning: Node ID #{node_id} not found in nodes_map during typing.", label: "Typer")
|
|
# No change if node_id is invalid
|
|
{:ok, nodes_map, env}
|
|
|
|
node_data ->
|
|
# Delegate to the worker function that processes the node.
|
|
do_type_node(node_data, nodes_map, env)
|
|
end
|
|
end
|
|
|
|
# Worker function to process a single node.
|
|
# Orchestrates typing children, inferring current node's type, and updating environment.
|
|
defp do_type_node(node_data, nodes_map, env) do
|
|
# Determine the environment and children to type based on node type
|
|
{children_to_process_ids, env_for_children, nodes_map_after_pre_processing} =
|
|
if node_data.ast_node_type == :lambda_expression do
|
|
# For lambdas: (fn params_s_expr body...)
|
|
# The 'fn' symbol (child 0) and 'params_s_expr' (child 1) are typed with the outer env.
|
|
# The body_node_ids are typed with the inner lambda_body_env.
|
|
|
|
# Type 'fn' symbol (first child of the original S-expression)
|
|
fn_op_child_id = hd(node_data.children)
|
|
|
|
{:ok, nmap_after_fn_op, env_after_fn_op} =
|
|
type_node_recursively(fn_op_child_id, nodes_map, env)
|
|
|
|
# Type params_s_expr (second child of the original S-expression)
|
|
# This node (node_data) has `params_s_expr_id` from the parser.
|
|
params_s_expr_node_id = node_data.params_s_expr_id
|
|
|
|
{:ok, nmap_after_params_s_expr, env_after_params_s_expr} =
|
|
type_node_recursively(params_s_expr_node_id, nmap_after_fn_op, env_after_fn_op)
|
|
|
|
# Create lambda body environment using arg_spec_node_ids.
|
|
# The lambda_expression node has `arg_spec_node_ids` and `return_type_spec_node_id`.
|
|
# Argument types need to be resolved and interned here to populate the env.
|
|
# nodes_map is nmap_after_params_s_expr at this point.
|
|
{lambda_body_env, nmap_after_arg_type_resolution} =
|
|
Enum.reduce(
|
|
node_data.arg_spec_node_ids,
|
|
{env_after_params_s_expr, nmap_after_params_s_expr},
|
|
fn arg_spec_id, {acc_env, acc_nodes_map} ->
|
|
arg_spec_node = Map.get(acc_nodes_map, arg_spec_id)
|
|
|
|
case arg_spec_node.ast_node_type do
|
|
# Unannotated param, e.g., x
|
|
:symbol ->
|
|
param_name = arg_spec_node.name
|
|
param_type_key = Types.primitive_type_key(:any)
|
|
{Map.put(acc_env, param_name, param_type_key), acc_nodes_map}
|
|
|
|
# Annotated param, e.g., (x integer)
|
|
:s_expression ->
|
|
param_symbol_node_id = hd(arg_spec_node.children)
|
|
type_spec_node_id = hd(tl(arg_spec_node.children))
|
|
|
|
param_symbol_node = Map.get(acc_nodes_map, param_symbol_node_id)
|
|
type_spec_node = Map.get(acc_nodes_map, type_spec_node_id)
|
|
|
|
param_name = param_symbol_node.name
|
|
|
|
# Resolve and intern the type specifier
|
|
{raw_type_def, nmap_after_resolve} =
|
|
ExpressionTyper.resolve_type_specifier_node(type_spec_node, acc_nodes_map)
|
|
|
|
{param_type_key, nmap_after_intern} =
|
|
Interner.get_or_intern_type(raw_type_def, nmap_after_resolve)
|
|
|
|
{Map.put(acc_env, param_name, param_type_key), nmap_after_intern}
|
|
end
|
|
end
|
|
)
|
|
|
|
# Children to process with this new env are the body_node_ids
|
|
{node_data.body_node_ids, lambda_body_env, nmap_after_arg_type_resolution}
|
|
else
|
|
# Default: type all children with the current environment
|
|
{Map.get(node_data, :children, []), env, nodes_map}
|
|
end
|
|
|
|
# 1. Recursively type the identified children with the determined environment.
|
|
{nodes_map_after_children, env_after_children} =
|
|
Enum.reduce(
|
|
children_to_process_ids,
|
|
{nodes_map_after_pre_processing, env_for_children},
|
|
fn child_id, {acc_nodes_map, acc_env} ->
|
|
{:ok, next_nodes_map, next_env} =
|
|
type_node_recursively(child_id, acc_nodes_map, acc_env)
|
|
|
|
{next_nodes_map, next_env}
|
|
end
|
|
)
|
|
|
|
# Retrieve the current node's data from the potentially updated nodes_map.
|
|
# More importantly, infer_type_for_node_ast needs the nodes_map_after_children
|
|
# to look up typed children.
|
|
current_node_from_map = Map.get(nodes_map_after_children, node_data.id)
|
|
|
|
# 2. Infer type for the current node.
|
|
# infer_type_for_node_ast now returns {type_definition_map, possibly_updated_nodes_map}.
|
|
{type_definition_for_current_node, nodes_map_after_inference_logic} =
|
|
infer_type_for_node_ast(
|
|
current_node_from_map,
|
|
nodes_map_after_children,
|
|
env_after_children
|
|
)
|
|
|
|
# Intern this type definition to get a key and update nodes_map.
|
|
{type_key_for_current_node, nodes_map_after_interning} =
|
|
Interner.get_or_intern_type(
|
|
type_definition_for_current_node,
|
|
nodes_map_after_inference_logic
|
|
)
|
|
|
|
# Update current node with the type key.
|
|
# Ensure we are updating the version of the node from nodes_map_after_interning
|
|
# (which is based on nodes_map_after_children).
|
|
re_fetched_current_node_data = Map.get(nodes_map_after_interning, current_node_from_map.id)
|
|
|
|
updated_current_node =
|
|
Map.put(re_fetched_current_node_data, :type_id, type_key_for_current_node)
|
|
|
|
nodes_map_with_typed_node =
|
|
Map.put(nodes_map_after_interning, updated_current_node.id, updated_current_node)
|
|
|
|
# 3. Update environment based on the current typed node (e.g., for assignments).
|
|
# update_env_from_node now returns {updated_env, updated_nodes_map}.
|
|
{env_after_current_node, nodes_map_after_env_update} =
|
|
Environment.update_env_from_node(
|
|
updated_current_node,
|
|
nodes_map_with_typed_node,
|
|
env_after_children
|
|
)
|
|
|
|
{:ok, nodes_map_after_env_update, env_after_current_node}
|
|
end
|
|
|
|
# Infers the type for a node based on its AST type and current environment.
|
|
# `nodes_map` contains potentially typed children (whose :type_id is a key) and canonical type definitions.
|
|
# `env` is the current typing environment (symbol names to type keys).
|
|
# Returns {type_definition_map, possibly_updated_nodes_map}.
|
|
defp infer_type_for_node_ast(node_data, nodes_map, env) do
|
|
case node_data.ast_node_type do
|
|
:literal_integer ->
|
|
{%{type_kind: :literal, value: node_data.value}, nodes_map}
|
|
|
|
:literal_string ->
|
|
{%{type_kind: :literal, value: node_data.value}, nodes_map}
|
|
|
|
# Atoms are parsed as :literal_atom with a :value field containing the Elixir atom (as per parser.ex)
|
|
:literal_atom ->
|
|
{%{type_kind: :literal, value: node_data.value}, nodes_map}
|
|
|
|
:symbol ->
|
|
case node_data.name do
|
|
"nil" ->
|
|
{Types.get_literal_type(:nil_atom), nodes_map}
|
|
|
|
"true" ->
|
|
{Types.get_literal_type(:true_atom), nodes_map}
|
|
|
|
"false" ->
|
|
{Types.get_literal_type(:false_atom), nodes_map}
|
|
|
|
_ ->
|
|
# Look up symbol in the environment. env stores type keys.
|
|
case Map.get(env, node_data.name) do
|
|
nil ->
|
|
# Symbol not found. Default to :any type definition.
|
|
# TODO: Handle unresolved symbols more robustly (e.g., specific error type).
|
|
{Types.get_primitive_type(:any), nodes_map}
|
|
|
|
found_type_key ->
|
|
# Resolve the key to its definition from nodes_map.
|
|
case Map.get(nodes_map, found_type_key) do
|
|
nil ->
|
|
# This indicates an inconsistency if a key from env isn't in nodes_map.
|
|
# Default to :any or an error type.
|
|
# IO.warn("Type key #{inspect(found_type_key)} for symbol '#{node_data.name}' not found in nodes_map.")
|
|
# Or a specific error type definition
|
|
{Types.get_primitive_type(:any), nodes_map}
|
|
|
|
type_definition ->
|
|
{type_definition, nodes_map}
|
|
end
|
|
end
|
|
end
|
|
|
|
:s_expression ->
|
|
ExpressionTyper.infer_s_expression_type(node_data, nodes_map, env)
|
|
|
|
:list_expression ->
|
|
children_ids = Map.get(node_data, :children, [])
|
|
num_children = length(children_ids)
|
|
|
|
element_type_definition =
|
|
cond do
|
|
num_children == 0 ->
|
|
Types.get_primitive_type(:nothing)
|
|
|
|
true ->
|
|
# Children are already typed. Get their type definitions.
|
|
child_type_defs =
|
|
Enum.map(children_ids, fn child_id ->
|
|
# nodes_map is nodes_map_after_children
|
|
child_node = Map.get(nodes_map, child_id)
|
|
type_key_for_child = child_node.type_id
|
|
|
|
# Resolve the type key to its definition.
|
|
type_def_for_child = Map.get(nodes_map, type_key_for_child)
|
|
|
|
if is_nil(type_def_for_child) do
|
|
# Fallback, should ideally not happen if children are correctly typed.
|
|
Types.get_primitive_type(:any)
|
|
else
|
|
type_def_for_child
|
|
end
|
|
end)
|
|
|
|
# Determine a common element type.
|
|
distinct_child_type_defs = Enum.uniq(child_type_defs)
|
|
|
|
cond do
|
|
length(distinct_child_type_defs) == 1 ->
|
|
# All elements effectively have the same type definition (e.g., [1, 1, 1] -> Literal 1).
|
|
List.first(distinct_child_type_defs)
|
|
|
|
true ->
|
|
# Form a union of the distinct child types.
|
|
# E.g., [1, 2, 3] -> (Union (Literal 1) (Literal 2) (Literal 3))
|
|
# E.g., [1, "a"] -> (Union (Literal 1) (Literal "a"))
|
|
# The types in distinct_child_type_defs are already resolved definitions.
|
|
# The interner will handle canonicalizing this union type.
|
|
%{type_kind: :union, types: MapSet.new(distinct_child_type_defs)}
|
|
end
|
|
end
|
|
|
|
list_type_def = %{
|
|
type_kind: :list,
|
|
# This is the full def; interner will use its key.
|
|
element_type: element_type_definition,
|
|
length: num_children
|
|
}
|
|
|
|
{list_type_def, nodes_map}
|
|
|
|
:file ->
|
|
# The :file node itself doesn't have a typical "type".
|
|
{Types.get_special_type(:file_marker), nodes_map}
|
|
|
|
:map_expression ->
|
|
children_ids = Map.get(node_data, :children, [])
|
|
# Children are [key1, value1, key2, value2, ...]
|
|
|
|
known_elements_raw =
|
|
children_ids
|
|
# [[k1,v1], [k2,v2]]
|
|
|> Enum.chunk_every(2)
|
|
|> Enum.reduce_while(%{}, fn [key_node_id, value_node_id], acc_known_elements ->
|
|
key_node = Map.get(nodes_map, key_node_id)
|
|
value_node = Map.get(nodes_map, value_node_id)
|
|
|
|
# Key's type must be a literal type for it to be used in known_elements.
|
|
# Child nodes (keys and values) are already typed at this stage.
|
|
key_type_def =
|
|
if key_node && key_node.type_id do
|
|
Map.get(nodes_map, key_node.type_id)
|
|
else
|
|
# Key node or its type_id is missing
|
|
nil
|
|
end
|
|
|
|
cond do
|
|
key_type_def && key_type_def.type_kind == :literal && value_node ->
|
|
literal_key_value = key_type_def.value
|
|
# Value node should have been typed, its type_id points to its definition
|
|
value_type_def =
|
|
Map.get(nodes_map, value_node.type_id, Types.get_primitive_type(:any))
|
|
|
|
updated_elements =
|
|
Map.put(
|
|
acc_known_elements,
|
|
literal_key_value,
|
|
%{value_type: value_type_def, optional: false}
|
|
)
|
|
|
|
{:cont, updated_elements}
|
|
|
|
true ->
|
|
# If a key's type is not a literal, or key/value nodes are missing,
|
|
# this map literal cannot be precisely typed with known_elements.
|
|
# Halt and return empty known_elements, leading to a less specific type.
|
|
# IO.warn(
|
|
# "Map literal key is not a literal type or node data missing. Key node: #{inspect(key_node)}, Key type: #{inspect(key_type_def)}"
|
|
# )
|
|
{:halt, %{}}
|
|
end
|
|
end)
|
|
|
|
# Default index signature for map literals: any other key maps to any value.
|
|
default_index_signature = %{
|
|
key_type: Types.get_primitive_type(:any),
|
|
value_type: Types.get_primitive_type(:any)
|
|
}
|
|
|
|
map_type_def = %{
|
|
type_kind: :map,
|
|
known_elements: known_elements_raw,
|
|
index_signature: default_index_signature
|
|
}
|
|
|
|
{map_type_def, nodes_map}
|
|
|
|
:tuple_expression ->
|
|
children_ids = Map.get(node_data, :children, [])
|
|
|
|
element_type_defs =
|
|
Enum.map(children_ids, fn child_id ->
|
|
# nodes_map is nodes_map_after_children
|
|
child_node = Map.get(nodes_map, child_id)
|
|
# This should be set from prior typing.
|
|
type_key_for_child = child_node.type_id
|
|
|
|
# Resolve the type key to its definition.
|
|
type_def_for_child = Map.get(nodes_map, type_key_for_child)
|
|
|
|
if is_nil(type_def_for_child) do
|
|
# This case indicates an internal inconsistency:
|
|
# a child node has a type_id, but that ID doesn't resolve to a type definition.
|
|
# This shouldn't happen in a correctly functioning typer.
|
|
# Fallback to :any for robustness, but log or signal error if possible.
|
|
# IO.warn("Tuple element #{child_id} (in node #{node_data.id}) has type_id #{type_key_for_child} but no definition in nodes_map.")
|
|
Types.get_primitive_type(:any)
|
|
else
|
|
type_def_for_child
|
|
end
|
|
end)
|
|
|
|
tuple_type_def = %{type_kind: :tuple, element_types: element_type_defs}
|
|
# nodes_map is unchanged here; interning of this new tuple_type_def happens later.
|
|
{tuple_type_def, nodes_map}
|
|
|
|
:lambda_expression ->
|
|
# node_data is the :lambda_expression node.
|
|
# Its body_node_ids have been typed using the lambda_body_env.
|
|
# nodes_map is nodes_map_after_children.
|
|
|
|
# Resolve argument types for the function signature
|
|
{raw_arg_type_defs, nodes_map_after_args} =
|
|
Enum.map_reduce(
|
|
node_data.arg_spec_node_ids,
|
|
# This is nodes_map_after_children from do_type_node
|
|
nodes_map,
|
|
fn arg_spec_id, acc_nodes_map ->
|
|
arg_spec_node = Map.get(acc_nodes_map, arg_spec_id)
|
|
|
|
case arg_spec_node.ast_node_type do
|
|
# Unannotated param
|
|
:symbol ->
|
|
{Types.get_primitive_type(:any), acc_nodes_map}
|
|
|
|
# Annotated param (param_symbol type_spec)
|
|
:s_expression ->
|
|
type_spec_node_id = hd(tl(arg_spec_node.children))
|
|
type_spec_node = Map.get(acc_nodes_map, type_spec_node_id)
|
|
ExpressionTyper.resolve_type_specifier_node(type_spec_node, acc_nodes_map)
|
|
end
|
|
end
|
|
)
|
|
|
|
# Resolve/Infer return type for the function signature
|
|
{return_type_def_for_signature, nodes_map_after_return} =
|
|
if node_data.return_type_spec_node_id do
|
|
# Explicit return type annotation
|
|
return_type_spec_node =
|
|
Map.get(nodes_map_after_args, node_data.return_type_spec_node_id)
|
|
|
|
{expected_return_raw_def, nmap_after_ret_resolve} =
|
|
ExpressionTyper.resolve_type_specifier_node(
|
|
return_type_spec_node,
|
|
nodes_map_after_args
|
|
)
|
|
|
|
# Intern the expected return type to get its canonical form for checks
|
|
{expected_return_key, nmap_after_ret_intern} =
|
|
Interner.get_or_intern_type(expected_return_raw_def, nmap_after_ret_resolve)
|
|
|
|
expected_return_interned_def = Map.get(nmap_after_ret_intern, expected_return_key)
|
|
|
|
# Check if actual body return type is subtype of annotated return type
|
|
_actual_body_return_interned_def =
|
|
if Enum.empty?(node_data.body_node_ids) do
|
|
# Raw, but interner handles it
|
|
Types.get_literal_type(:nil_atom)
|
|
else
|
|
last_body_expr_node =
|
|
Map.get(nmap_after_ret_intern, List.last(node_data.body_node_ids))
|
|
|
|
# Already interned
|
|
Map.get(nmap_after_ret_intern, last_body_expr_node.type_id)
|
|
end
|
|
|
|
# Perform subtype check if needed (for error reporting, not changing signature type yet)
|
|
# if !SubtypeChecker.is_subtype?(_actual_body_return_interned_def, expected_return_interned_def, nmap_after_ret_intern) do
|
|
# IO.warn("Lambda body return type mismatch with annotation.") # Placeholder for error
|
|
# end
|
|
|
|
{expected_return_interned_def, nmap_after_ret_intern}
|
|
else
|
|
# Infer return type from body
|
|
inferred_return_def =
|
|
if Enum.empty?(node_data.body_node_ids) do
|
|
Types.get_literal_type(:nil_atom)
|
|
else
|
|
last_body_expr_node =
|
|
Map.get(nodes_map_after_args, List.last(node_data.body_node_ids))
|
|
|
|
# Already interned
|
|
Map.get(nodes_map_after_args, last_body_expr_node.type_id)
|
|
end
|
|
|
|
{inferred_return_def, nodes_map_after_args}
|
|
end
|
|
|
|
function_type_raw_def = %{
|
|
type_kind: :function,
|
|
arg_types: raw_arg_type_defs,
|
|
# This is an interned def or raw primitive/literal
|
|
return_type: return_type_def_for_signature,
|
|
type_params: []
|
|
}
|
|
|
|
{function_type_raw_def, nodes_map_after_return}
|
|
|
|
# Default for other AST node types
|
|
_ ->
|
|
# Placeholder: return :any type definition.
|
|
{Types.get_primitive_type(:any), nodes_map}
|
|
end
|
|
end
|
|
end
|