Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 50 additions & 39 deletions crates/red_knot/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ use crate::files::FileId;
use crate::symbols::Dependency;
use crate::FxDashMap;

/// ID uniquely identifying a module.
/// Representation of a Python module.
///
/// The inner type wrapped by this struct is a unique identifier for the module
/// that is used by the struct's methods to lazily query information about the module.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct Module(u32);

Expand Down Expand Up @@ -100,7 +103,8 @@ impl Module {

/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name).
/// Always normalized to the absolute form
/// (never a relative module name, i.e., never `.foo`).
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct ModuleName(smol_str::SmolStr);

Expand Down Expand Up @@ -250,9 +254,11 @@ pub struct ModuleData {
// Queries
//////////////////////////////////////////////////////

/// Resolves a module name to a module id
/// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient and, therefore, cannot be used as part of a query.
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s.
/// Resolves a module name to a module.
///
/// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient
/// and, therefore, cannot be used as part of a query.
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s.
#[tracing::instrument(level = "debug", skip(db))]
pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Option<Module>> {
let jar: &SemanticJar = db.jar()?;
Expand All @@ -274,15 +280,15 @@ pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Opti
let file_id = db.file_id(&normalized);
let path = ModulePath::new(root_path.clone(), file_id);

let id = Module(
let module = Module(
modules
.next_module_id
.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
);

modules
.modules
.insert(id, Arc::from(ModuleData { name, path, kind }));
.insert(module, Arc::from(ModuleData { name, path, kind }));

// A path can map to multiple modules because of symlinks:
// ```
Expand All @@ -291,33 +297,33 @@ pub fn resolve_module(db: &dyn SemanticDb, name: ModuleName) -> QueryResult<Opti
// ```
// Here, both `foo` and `bar` resolve to the same module but through different paths.
// That's why we need to insert the absolute path and not the normalized path here.
let absolute_id = if absolute_path == normalized {
let absolute_file_id = if absolute_path == normalized {
file_id
} else {
db.file_id(&absolute_path)
};

modules.by_file.insert(absolute_id, id);
modules.by_file.insert(absolute_file_id, module);

entry.insert_entry(id);
entry.insert_entry(module);

Ok(Some(id))
Ok(Some(module))
}
}
}

/// Resolves the module id for the given path.
/// Resolves the module for the given path.
///
/// Returns `None` if the path is not a module in `sys.path`.
/// Returns `None` if the path is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn path_to_module(db: &dyn SemanticDb, path: &Path) -> QueryResult<Option<Module>> {
let file = db.file_id(path);
file_to_module(db, file)
}

/// Resolves the module id for the file with the given id.
/// Resolves the module for the file with the given id.
///
/// Returns `None` if the file is not a module in `sys.path`.
/// Returns `None` if the file is not a module locatable via `sys.path`.
#[tracing::instrument(level = "debug", skip(db))]
pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<Module>> {
let jar: &SemanticJar = db.jar()?;
Expand All @@ -344,12 +350,12 @@ pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<M

// Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same in different
// root paths, but that the module corresponding to the past path is in a lower priority path,
// root paths, but that the module corresponding to the past path is in a lower priority search path,
// in which case we ignore it.
let Some(module_id) = resolve_module(db, module_name)? else {
let Some(module) = resolve_module(db, module_name)? else {
return Ok(None);
};
let module_path = module_id.path(db)?;
let module_path = module.path(db)?;

if module_path.root() == &root_path {
let Ok(normalized) = path.canonicalize() else {
Expand All @@ -369,7 +375,7 @@ pub fn file_to_module(db: &dyn SemanticDb, file: FileId) -> QueryResult<Option<M
}

// Path has been inserted by `resolved`
Ok(Some(module_id))
Ok(Some(module))
} else {
// This path is for a module with the same name but in a module search path with a lower priority.
// Ignore it.
Expand All @@ -388,19 +394,22 @@ pub fn set_module_search_paths(db: &mut dyn SemanticDb, search_paths: Vec<Module
jar.module_resolver = ModuleResolver::new(search_paths);
}

/// Adds a module to the resolver.
/// Adds a module located at `path` to the resolver.
///
/// Returns `None` if the path doesn't resolve to a module.
///
/// Returns `Some` with the id of the module and the ids of the modules that need re-resolving
/// because they were part of a namespace package and might now resolve differently.
/// Returns `Some(module, other_modules)`, where `module` is the resolved module
/// with file location `path`, and `other_modules` is a `Vec` of `ModuleData` instances.
/// Each element in `other_modules` provides information regarding a single module that needs
/// re-resolving because it was part of a namespace package and might now resolve differently.
///
/// Note: This won't work with salsa because `Path` is not an ingredient.
pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> {
// No locking is required because we're holding a mutable reference to `modules`.

// TODO This needs tests

// Note: Intentionally by-pass caching here. Module should not be in the cache yet.
// Note: Intentionally bypass caching here. Module should not be in the cache yet.
let module = path_to_module(db, path).ok()??;

// The code below is to handle the addition of `__init__.py` files.
Expand All @@ -424,15 +433,15 @@ pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<A
let jar: &mut SemanticJar = db.jar_mut();
let modules = &mut jar.module_resolver;

modules.by_file.retain(|_, id| {
modules.by_file.retain(|_, module| {
if modules
.modules
.get(id)
.get(module)
.unwrap()
.name
.starts_with(&parent_name)
{
to_remove.push(*id);
to_remove.push(*module);
false
} else {
true
Expand All @@ -441,8 +450,8 @@ pub fn add_module(db: &mut dyn SemanticDb, path: &Path) -> Option<(Module, Vec<A

// TODO remove need for this vec
let mut removed = Vec::with_capacity(to_remove.len());
for id in &to_remove {
removed.push(modules.remove_module_by_id(*id));
for module in &to_remove {
removed.push(modules.remove_module(*module));
}

Some((module, removed))
Expand All @@ -455,10 +464,10 @@ pub struct ModuleResolver {

// Locking: Locking is done by acquiring a (write) lock on `by_name`. This is because `by_name` is the primary
// lookup method. Acquiring locks in any other ordering can result in deadlocks.
/// Resolves a module name to it's module id.
/// Looks up a module by name
by_name: FxDashMap<ModuleName, Module>,

/// All known modules, indexed by the module id.
/// A map of all known modules to data about those modules
modules: FxDashMap<Module, Arc<ModuleData>>,

/// Lookup from absolute path to module.
Expand All @@ -479,24 +488,26 @@ impl ModuleResolver {
}

/// Remove a module from the inner cache
pub(crate) fn remove_module(&mut self, file_id: FileId) {
pub(crate) fn remove_module_by_file(&mut self, file_id: FileId) {
// No locking is required because we're holding a mutable reference to `self`.
let Some((_, id)) = self.by_file.remove(&file_id) else {
let Some((_, module)) = self.by_file.remove(&file_id) else {
return;
};

self.remove_module_by_id(id);
self.remove_module(module);
}

fn remove_module_by_id(&mut self, id: Module) -> Arc<ModuleData> {
let (_, module) = self.modules.remove(&id).unwrap();
fn remove_module(&mut self, module: Module) -> Arc<ModuleData> {
let (_, module_data) = self.modules.remove(&module).unwrap();

self.by_name.remove(&module.name).unwrap();
self.by_name.remove(&module_data.name).unwrap();

// It's possible that multiple paths map to the same id. Search all other paths referencing the same module id.
self.by_file.retain(|_, current_id| *current_id != id);
// It's possible that multiple paths map to the same module.
// Search all other paths referencing the same module.
self.by_file
.retain(|_, current_module| *current_module != module);

module
module_data
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/red_knot/src/program/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl Program {

let (source, semantic, lint) = self.jars_mut();
for change in aggregated_changes.iter() {
semantic.module_resolver.remove_module(change.id);
semantic.module_resolver.remove_module_by_file(change.id);
semantic.symbol_tables.remove(&change.id);
source.sources.remove(&change.id);
source.parsed.remove(&change.id);
Expand Down