diff --git a/pydatalab/schemas/cell.json b/pydatalab/schemas/cell.json index a41903da4..94cd46694 100644 --- a/pydatalab/schemas/cell.json +++ b/pydatalab/schemas/cell.json @@ -48,6 +48,10 @@ "$ref": "#/definitions/Person" } }, + "deleted": { + "title": "Deleted", + "type": "boolean" + }, "type": { "title": "Type", "default": "cells", diff --git a/pydatalab/schemas/equipment.json b/pydatalab/schemas/equipment.json index d5bd7eeb1..2ece5d99b 100644 --- a/pydatalab/schemas/equipment.json +++ b/pydatalab/schemas/equipment.json @@ -48,6 +48,10 @@ "$ref": "#/definitions/Person" } }, + "deleted": { + "title": "Deleted", + "type": "boolean" + }, "type": { "title": "Type", "default": "equipment", diff --git a/pydatalab/schemas/sample.json b/pydatalab/schemas/sample.json index 35cb3fc64..ee38292ad 100644 --- a/pydatalab/schemas/sample.json +++ b/pydatalab/schemas/sample.json @@ -48,6 +48,10 @@ "$ref": "#/definitions/Person" } }, + "deleted": { + "title": "Deleted", + "type": "boolean" + }, "type": { "title": "Type", "default": "samples", diff --git a/pydatalab/schemas/startingmaterial.json b/pydatalab/schemas/startingmaterial.json index ad1446b3a..c3034bb7b 100644 --- a/pydatalab/schemas/startingmaterial.json +++ b/pydatalab/schemas/startingmaterial.json @@ -48,6 +48,10 @@ "$ref": "#/definitions/Person" } }, + "deleted": { + "title": "Deleted", + "type": "boolean" + }, "type": { "title": "Type", "default": "starting_materials", diff --git a/pydatalab/src/pydatalab/models/items.py b/pydatalab/src/pydatalab/models/items.py index 4195f1ce6..21c76d22d 100644 --- a/pydatalab/src/pydatalab/models/items.py +++ b/pydatalab/src/pydatalab/models/items.py @@ -10,6 +10,7 @@ HasOwner, HasRevisionControl, IsCollectable, + IsDeletable, ) from pydatalab.models.utils import ( HumanReadableIdentifier, @@ -19,7 +20,7 @@ ) -class Item(Entry, HasOwner, HasRevisionControl, IsCollectable, HasBlocks, abc.ABC): +class Item(Entry, IsDeletable, HasOwner, HasRevisionControl, IsCollectable, HasBlocks, abc.ABC): """The generic model for data types that will be exposed with their own named endpoints.""" refcode: Refcode = None # type: ignore diff --git a/pydatalab/src/pydatalab/models/traits.py b/pydatalab/src/pydatalab/models/traits.py index 07be0d937..2cf34fa6d 100644 --- a/pydatalab/src/pydatalab/models/traits.py +++ b/pydatalab/src/pydatalab/models/traits.py @@ -78,3 +78,18 @@ def add_missing_collection_relationships(cls, values): raise RuntimeError("Relationships and collections mismatch") return values + + +class IsDeletable(BaseModel): + """Adds a 'private' trait for whether the item is deleted. + This can be used to soft-delete entries in the database. + """ + + deleted: bool = Field(None) + + @root_validator(pre=True) + def check_deleted(cls, values): + """If `deleted` is set to anything but `True`, drop the field.""" + if "deleted" in values and values["deleted"] is not True: + values.pop("deleted") + return values diff --git a/pydatalab/src/pydatalab/mongo.py b/pydatalab/src/pydatalab/mongo.py index d0195a97f..d9b12c8aa 100644 --- a/pydatalab/src/pydatalab/mongo.py +++ b/pydatalab/src/pydatalab/mongo.py @@ -6,6 +6,7 @@ from pydantic import BaseModel from pymongo.errors import ConnectionFailure +from pydatalab.logger import LOGGER from pydatalab.models import ITEM_MODELS __all__ = ( @@ -95,6 +96,7 @@ def check_mongo_connection() -> None: def create_default_indices( client: Optional[pymongo.MongoClient] = None, background: bool = False, + allow_rebuild: bool = False, ) -> List[str]: """Creates indices for the configured or passed MongoClient. @@ -105,7 +107,10 @@ def create_default_indices( - A text index over user names and identities. Parameters: + client: The MongoClient to use. If None, a new one will be created. background: If true, indexes will be created as background jobs. + allow_rebuild: If true, named indexes will be recreated if they already exist + with alternative options. Returns: A list of messages returned by each `create_index` call. @@ -146,14 +151,30 @@ def create_fts(): weights={"collection_id": 3, "title": 3, "description": 3}, ) - ret += db.items.create_index("type", name="item type", background=background) - ret += db.items.create_index( - "item_id", unique=True, name="unique item ID", background=background - ) - ret += db.items.create_index( - "refcode", unique=True, name="unique refcode", background=background - ) - ret += db.items.create_index("last_modified", name="last modified", background=background) + indices = [ + {"type": {"name": "item type", "background": background}}, + { + "item_id": { + "name": "unique item ID", + "unique": True, + "background": background, + "partialFilterExpression": {"deleted": {"$eq": None}}, + } + }, + {"refcode": {"name": "unique refcode", "unique": True, "background": background}}, + {"last_modified": {"name": "last modified", "background": background}}, + {"creator_ids": {"name": "creators", "background": background}}, + {"deleted": {"name": "deleted items", "background": background}}, + ] + + for index in indices: + for field, options in index.items(): + try: + ret += db.items.create_index(field, **options) + except pymongo.errors.OperationFailure as exc: + LOGGER.warning("Rebuilding index %s", options["name"], exc_info=exc) + db.items.drop_index(options["name"]) + ret += db.items.create_index(field, **options) user_fts_fields = {"identities.name", "display_name"} diff --git a/pydatalab/src/pydatalab/permissions.py b/pydatalab/src/pydatalab/permissions.py index c36e3a0eb..729d2ee8c 100644 --- a/pydatalab/src/pydatalab/permissions.py +++ b/pydatalab/src/pydatalab/permissions.py @@ -60,7 +60,7 @@ def wrapped_route(*args, **kwargs): return wrapped_route -def get_default_permissions(user_only: bool = True) -> Dict[str, Any]: +def get_default_permissions(user_only: bool = True, match_deleted: bool = False) -> Dict[str, Any]: """Return the MongoDB query terms corresponding to the current user. Will return open permissions if a) the `CONFIG.TESTING` parameter is `True`, @@ -70,11 +70,17 @@ def get_default_permissions(user_only: bool = True) -> Dict[str, Any]: user_only: Whether to exclude items that also have no attached user (`False`), i.e., public items. This should be set to `False` when reading (and wanting to return public items), but left as `True` when modifying or removing items. + match_deleted: Whether to include items that have been soft-deleted. """ + return_match: dict = {"$and": []} + + if not match_deleted: + return_match["$and"].append({"deleted": {"$ne": True}}) + if CONFIG.TESTING: - return {} + return return_match if ( current_user.is_authenticated @@ -109,11 +115,16 @@ def get_default_permissions(user_only: bool = True) -> Dict[str, Any]: # TODO: remove this hack when permissions are refactored. Currently starting_materials and equipment # are a special case that should be group editable, so even when the route has asked to only edit this # user's stuff, we can also let starting materials and equipment through. - user_perm = {"$or": [user_perm, {"type": {"$in": ["starting_materials", "equipment"]}}]} - return user_perm - return {"$or": [user_perm, null_perm]} + return_match["$and"].append( + {"$or": [user_perm, {"type": {"$in": ["starting_materials", "equipment"]}}]} + ) + return return_match + + return_match["$and"].append({"$or": [user_perm, null_perm]}) + return return_match elif user_only: return {"_id": -1} - return null_perm + return_match["$and"].append(null_perm) + return return_match diff --git a/pydatalab/src/pydatalab/routes/v0_1/graphs.py b/pydatalab/src/pydatalab/routes/v0_1/graphs.py index 82c3d122c..82c618041 100644 --- a/pydatalab/src/pydatalab/routes/v0_1/graphs.py +++ b/pydatalab/src/pydatalab/routes/v0_1/graphs.py @@ -31,7 +31,7 @@ def get_graph_cy_format(item_id: Optional[str] = None, collection_id: Optional[s query = {} all_documents = flask_mongo.db.items.find( {**query, **get_default_permissions(user_only=False)}, - projection={"item_id": 1, "name": 1, "type": 1, "relationships": 1}, + projection={"item_id": 1, "name": 1, "type": 1, "relationships": 1, "deleted": 1}, ) node_ids: Set[str] = {document["item_id"] for document in all_documents} all_documents.rewind() @@ -43,7 +43,7 @@ def get_graph_cy_format(item_id: Optional[str] = None, collection_id: Optional[s "$or": [{"item_id": item_id}, {"relationships.item_id": item_id}], **get_default_permissions(user_only=False), }, - projection={"item_id": 1, "name": 1, "type": 1, "relationships": 1}, + projection={"item_id": 1, "name": 1, "type": 1, "relationships": 1, "deleted": 1}, ) ) @@ -59,7 +59,7 @@ def get_graph_cy_format(item_id: Optional[str] = None, collection_id: Optional[s "$or": or_query, **get_default_permissions(user_only=False), }, - projection={"item_id": 1, "name": 1, "type": 1, "relationships": 1}, + projection={"item_id": 1, "name": 1, "type": 1, "relationships": 1, "deleted": 1}, ) all_documents.extend(next_shell) @@ -71,9 +71,14 @@ def get_graph_cy_format(item_id: Optional[str] = None, collection_id: Optional[s # Collect the elements that have already been added to the graph, to avoid duplication drawn_elements = set() node_collections = set() + deleted_items = set() for document in all_documents: # for some reason, document["relationships"] is sometimes equal to None, so we # need this `or` statement. + if document.get("deleted", None): + deleted_items.add(document["item_id"]) + continue + for relationship in document.get("relationships") or []: # only considering child-parent relationships if relationship.get("type") == "collections" and not collection_id: @@ -124,6 +129,8 @@ def get_graph_cy_format(item_id: Optional[str] = None, collection_id: Optional[s source = relationship["item_id"] if source not in node_ids: continue + if target not in node_ids: + continue edge_id = f"{source}->{target}" if edge_id not in drawn_elements: drawn_elements.add(edge_id) @@ -151,6 +158,14 @@ def get_graph_cy_format(item_id: Optional[str] = None, collection_id: Optional[s } ) + # Filter out any edges to deleted nodes + edges_to_remove = [] + for ind, edge in enumerate(edges): + if edge["data"]["source"] in deleted_items or edge["data"]["target"] in deleted_items: + edges_to_remove.append(edge["data"]["id"]) + + edges = [edge for edge in edges if edge["data"]["id"] not in edges_to_remove] + # We want to filter out all the starting materials that don't have relationships since there are so many of them: whitelist = {edge["data"]["source"] for edge in edges} diff --git a/pydatalab/src/pydatalab/routes/v0_1/items.py b/pydatalab/src/pydatalab/routes/v0_1/items.py index 60628e021..95f951aff 100644 --- a/pydatalab/src/pydatalab/routes/v0_1/items.py +++ b/pydatalab/src/pydatalab/routes/v0_1/items.py @@ -288,6 +288,7 @@ def search_items(): nresults: Maximum number of (default 100) types: If None, search all types of items. Otherwise, a list of strings giving the types to consider. (e.g. ["samples","starting_materials"]) + match_deleted: Whether to include items that have been soft-deleted. Returns: response list of dictionaries containing the matching items in order of @@ -297,13 +298,14 @@ def search_items(): query = request.args.get("query", type=str) nresults = request.args.get("nresults", default=100, type=int) types = request.args.get("types", default=None) + match_deleted = request.args.get("match_deleted", default=False, type=bool) if isinstance(types, str): # should figure out how to parse as list automatically types = types.split(",") match_obj = { "$text": {"$search": query}, - **get_default_permissions(user_only=False), + **get_default_permissions(user_only=False, match_deleted=match_deleted), } if types is not None: match_obj["type"] = {"$in": types} @@ -467,7 +469,7 @@ def _create_sample( ) # check to make sure that item_id isn't taken already - if flask_mongo.db.items.find_one({"item_id": sample_dict["item_id"]}): + if flask_mongo.db.items.find_one({"item_id": sample_dict["item_id"], "deleted": {"$ne": True}}): return ( dict( status="error", @@ -697,21 +699,46 @@ def update_item_permissions(refcode: str): return jsonify({"status": "success"}), 200 +@ITEMS.route("/items/", methods=["DELETE"]) @ITEMS.route("/delete-sample/", methods=["POST"]) -def delete_sample(): - request_json = request.get_json() # noqa: F821 pylint: disable=undefined-variable - item_id = request_json["item_id"] +def delete_sample(refcode: str | None = None, item_id: str | None = None): + """Sets the `deleted` status an item with the given refcode.""" - result = flask_mongo.db.items.delete_one( - {"item_id": item_id, **get_default_permissions(user_only=True)} + if refcode is None: + request_json = request.get_json() # noqa: F821 pylint: disable=undefined-variable + item_id = request_json["item_id"] + + if item_id: + match = {"item_id": item_id} + elif refcode: + if not len(refcode.split(":")) == 2: + refcode = f"{CONFIG.IDENTIFIER_PREFIX}:{refcode}" + + match = {"refcode": refcode} + else: + return ( + jsonify( + { + "status": "error", + "message": "No item_id or refcode provided.", + } + ), + 400, + ) + + LOGGER.warning(f"Setting deleted status for {refcode=}, {item_id=}.") + + result = flask_mongo.db.items.update_one( + {**match, **get_default_permissions(user_only=True)}, + {"$set": {"deleted": True}}, ) - if result.deleted_count != 1: + if result.modified_count != 1: return ( jsonify( { "status": "error", - "message": f"Authorization required to attempt to delete sample with {item_id=} from the database.", + "message": f"Authorization required to attempt to delete sample with {refcode=} / {item_id=} from the database.", } ), 401, @@ -734,6 +761,8 @@ def get_item_data( """Generates a JSON response for the item with the given `item_id`, or `refcode` additionally resolving relationships to files and other items. + Will return deleted items if specifically requested. + Parameters: load_blocks: Whether to regenerate any data blocks associated with this sample (i.e., create the Python object corresponding to the block and @@ -768,9 +797,10 @@ def get_item_data( { "$match": { **match, - **get_default_permissions(user_only=False), + **get_default_permissions(user_only=False, match_deleted=True), } }, + {"$sort": {"deleted": 1}}, {"$lookup": creators_lookup()}, {"$lookup": collections_lookup()}, {"$lookup": files_lookup()}, @@ -874,9 +904,14 @@ def get_item_data( f["immutable_id"]: f for f in return_dict.get("files") or [] } + warnings = [] + if return_dict.get("deleted"): + warnings += [f"The item with refcode {return_dict['refcode']!r} has been deleted."] + return jsonify( { "status": "success", + "warnings": warnings, "item_id": item_id, "item_data": return_dict, "files_data": files_data, diff --git a/pydatalab/tests/server/test_equipment.py b/pydatalab/tests/server/test_equipment.py index 093113b8f..01afabe65 100644 --- a/pydatalab/tests/server/test_equipment.py +++ b/pydatalab/tests/server/test_equipment.py @@ -124,4 +124,5 @@ def test_delete_equipment(admin_client, default_equipment_dict): response = admin_client.get( f"/get-item-data/{default_equipment_dict['item_id']}", ) - assert response.status_code == 404 + assert response.status_code == 200 + assert "has been deleted" in response.json["warnings"][0] diff --git a/pydatalab/tests/server/test_samples.py b/pydatalab/tests/server/test_samples.py index 69b438d47..cc0ea8c46 100644 --- a/pydatalab/tests/server/test_samples.py +++ b/pydatalab/tests/server/test_samples.py @@ -134,7 +134,29 @@ def test_delete_sample(client, default_sample_dict): response = client.get( f"/get-item-data/{default_sample_dict['item_id']}", ) - assert response.status_code == 404 + assert response.status_code == 200 + assert "has been deleted" in response.json["warnings"][0] + + # Check that the ID is freed up, and that delete by refcode also works + response = client.post("/new-sample/", json=default_sample_dict) + # Test that 201: Created is emitted + assert response.status_code == 201, response.json + assert response.json["status"] == "success" + + response = client.get( + f"/get-item-data/{default_sample_dict['item_id']}", + ) + refcode = response.json["item_data"]["refcode"] + + del_response = client.delete( + f"/items/{refcode}", + ) + assert del_response.status_code == 200 + assert del_response.json["status"] == "success" + + response = client.get(f"/items/{refcode}") + assert response.status_code == 200 + assert f"The item with refcode {refcode!r} has been deleted." == response.json["warnings"][0] @pytest.mark.dependency(depends=["test_delete_sample"]) @@ -146,7 +168,16 @@ def test_create_indices(real_mongo_client): create_default_indices(real_mongo_client) indexes = list(real_mongo_client.get_database().items.list_indexes()) - expected_index_names = ("_id_", "items full-text search", "item type", "unique item ID") + expected_index_names = ( + "_id_", + "items full-text search", + "item type", + "unique item ID", + "unique refcode", + "last modified", + "creators", + "deleted items", + ) names = [index["name"] for index in indexes] assert all(name in names for name in expected_index_names) diff --git a/pydatalab/tests/server/test_starting_materials.py b/pydatalab/tests/server/test_starting_materials.py index ff9761c44..6e897c581 100644 --- a/pydatalab/tests/server/test_starting_materials.py +++ b/pydatalab/tests/server/test_starting_materials.py @@ -127,6 +127,7 @@ def test_delete_starting_material(admin_client, default_starting_material_dict): ) assert response.status_code == 200 assert response.json["status"] == "success" + assert "has been deleted" in response.json["warnings"][0] # Check it was actually deleted response = admin_client.get(