-
Notifications
You must be signed in to change notification settings - Fork 27
INTPYTHON-729: (PoC) Improve flexibility and QOL of Atlas/Vector Search Index Configurations #370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,9 +109,22 @@ class SearchIndex(Index): | |
suffix = "six" | ||
_error_id_prefix = "django_mongodb_backend.indexes.SearchIndex" | ||
|
||
def __init__(self, *, fields=(), name=None): | ||
def __init__(self, *, fields=(), name=None, field_mappings=None): | ||
if field_mappings and not isinstance(field_mappings, dict): | ||
raise ValueError( | ||
"field_mappings must be a dictionary mapping field names to their " | ||
"Atlas Search field mappings." | ||
) | ||
self.field_mappings = field_mappings or {} | ||
|
||
fields = list({*fields, *self.field_mappings.keys()}) | ||
super().__init__(fields=fields, name=name) | ||
|
||
def deconstruct(self): | ||
path, args, kwargs = super().deconstruct() | ||
kwargs["field_mappings"] = self.field_mappings | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return path, args, kwargs | ||
|
||
def check(self, model, connection): | ||
errors = [] | ||
if not connection.features.supports_atlas_search: | ||
|
@@ -152,23 +165,39 @@ def get_pymongo_index_model( | |
return None | ||
fields = {} | ||
for field_name, _ in self.fields_orders: | ||
field = model._meta.get_field(field_name) | ||
type_ = self.search_index_data_types(field.db_type(schema_editor.connection)) | ||
field_path = column_prefix + model._meta.get_field(field_name).column | ||
fields[field_path] = {"type": type_} | ||
if field_name in self.field_mappings: | ||
fields[field_path] = self.field_mappings[field_name].copy() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why
Comment on lines
+169
to
+170
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah,
|
||
else: | ||
# If no field mapping is provided, use the default search index data type. | ||
field = model._meta.get_field(field_name) | ||
type_ = self.search_index_data_types(field.db_type(schema_editor.connection)) | ||
fields[field_path] = {"type": type_} | ||
return SearchIndexModel( | ||
definition={"mappings": {"dynamic": False, "fields": fields}}, name=self.name | ||
) | ||
|
||
|
||
class DynamicSearchIndex(SearchIndex): | ||
suffix = "dsix" | ||
_error_id_prefix = "django_mongodb_backend.indexes.DynamicSearchIndex" | ||
Comment on lines
+181
to
+183
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here I can override the class DynamicSearchIndex(SearchIndex):
def __init__(...):
super().__init__(fields=("id"), name=name, field_mappings=field_mappings) Overall design should be properly bike-shed either way. |
||
|
||
def get_pymongo_index_model( | ||
self, model, schema_editor, field=None, unique=False, column_prefix="" | ||
): | ||
if not schema_editor.connection.features.supports_atlas_search: | ||
return None | ||
return SearchIndexModel(definition={"mappings": {"dynamic": True}}, name=self.name) | ||
|
||
|
||
class VectorSearchIndex(SearchIndex): | ||
suffix = "vsi" | ||
_error_id_prefix = "django_mongodb_backend.indexes.VectorSearchIndex" | ||
VALID_FIELD_TYPES = frozenset(("boolean", "date", "number", "objectId", "string", "uuid")) | ||
VALID_SIMILARITIES = frozenset(("cosine", "dotProduct", "euclidean")) | ||
|
||
def __init__(self, *, fields=(), name=None, similarities): | ||
super().__init__(fields=fields, name=name) | ||
def __init__(self, *, fields=(), name=None, similarities=(), fields_mappings=None): | ||
super().__init__(fields=fields, name=name, field_mappings=fields_mappings) | ||
self.similarities = similarities | ||
self._multiple_similarities = isinstance(similarities, tuple | list) | ||
for func in similarities if self._multiple_similarities else (similarities,): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using set unpacking with
{*fields, *self.field_mappings.keys()}
may not preserve the original order of fields. Consider usinglist(dict.fromkeys(list(fields) + list(self.field_mappings.keys())))
to maintain order while removing duplicates.Copilot uses AI. Check for mistakes.