diff --git a/CHANGELOG.md b/CHANGELOG.md
index 722f5540b..5d72591d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## Unreleased changes
+
+### What's Changed
+
+* Major refactoring of block life cycle, with better possibilities for validation of block data before and after saving by @ml-evs in #1311
+* Replace browser-native dialogs with custom datalab dialog service by @BenjaminCharmes in https://github.com/datalab-org/datalab/pull/1212
+* Resolve CVEs on mermaid.js and cross-spawn by @dependabot[bot] in https://github.com/datalab-org/datalab/pull/1317
+
+**Full Changelog**: https://github.com/datalab-org/datalab/compare/v0.6.2...main
+
 ## v0.6.2 (August 2025)
 
 This patch release adds a hotfix for broken media blocks when encoding TIF files (#1318).
diff --git a/pydatalab/pyproject.toml b/pydatalab/pyproject.toml
index 74c25507b..6503e8fad 100644
--- a/pydatalab/pyproject.toml
+++ b/pydatalab/pyproject.toml
@@ -140,6 +140,8 @@ filterwarnings = [
     "ignore:.*np.bool8*:DeprecationWarning",
     "ignore::pytest.PytestUnraisableExceptionWarning",
     "ignore:.*JCAMP-DX key without value*:UserWarning",
+    "ignore:.*Unable to find wavenumber unit*:UserWarning",
+    "ignore:.*Unable to find wavenumber offset*:UserWarning",
 ]
 
 [tool.liccheck]
diff --git a/pydatalab/schemas/cell.json b/pydatalab/schemas/cell.json
index f891bc9af..a3a97ed19 100644
--- a/pydatalab/schemas/cell.json
+++ b/pydatalab/schemas/cell.json
@@ -176,7 +176,7 @@
   "definitions": {
     "DataBlockResponse": {
       "title": "DataBlockResponse",
-      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.",
+      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.\n\nIt is expected but not mandatory that this model will be extended by the specific block type\nwhere possible.",
       "type": "object",
       "properties": {
         "blocktype": {
@@ -214,6 +214,20 @@
             "type": "string"
           }
         },
+        "errors": {
+          "title": "Errors",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "warnings": {
+          "title": "Warnings",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
         "b64_encoded_image": {
           "title": "B64 Encoded Image",
           "type": "object",
@@ -223,7 +237,15 @@
         },
         "bokeh_plot_data": {
           "title": "Bokeh Plot Data",
-          "type": "string"
+          "type": "object"
+        },
+        "computed": {
+          "title": "Computed",
+          "type": "object"
+        },
+        "metadata": {
+          "title": "Metadata",
+          "type": "object"
         }
       },
       "required": [
diff --git a/pydatalab/schemas/equipment.json b/pydatalab/schemas/equipment.json
index 65dc68c08..9affcfda8 100644
--- a/pydatalab/schemas/equipment.json
+++ b/pydatalab/schemas/equipment.json
@@ -140,7 +140,7 @@
   "definitions": {
     "DataBlockResponse": {
       "title": "DataBlockResponse",
-      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.",
+      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.\n\nIt is expected but not mandatory that this model will be extended by the specific block type\nwhere possible.",
       "type": "object",
       "properties": {
         "blocktype": {
@@ -178,6 +178,20 @@
             "type": "string"
           }
         },
+        "errors": {
+          "title": "Errors",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "warnings": {
+          "title": "Warnings",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
         "b64_encoded_image": {
           "title": "B64 Encoded Image",
           "type": "object",
@@ -187,7 +201,15 @@
         },
         "bokeh_plot_data": {
           "title": "Bokeh Plot Data",
-          "type": "string"
+          "type": "object"
+        },
+        "computed": {
+          "title": "Computed",
+          "type": "object"
+        },
+        "metadata": {
+          "title": "Metadata",
+          "type": "object"
         }
       },
       "required": [
diff --git a/pydatalab/schemas/sample.json b/pydatalab/schemas/sample.json
index e5ec8b5fd..3c8ebfb91 100644
--- a/pydatalab/schemas/sample.json
+++ b/pydatalab/schemas/sample.json
@@ -229,7 +229,7 @@
     },
     "DataBlockResponse": {
       "title": "DataBlockResponse",
-      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.",
+      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.\n\nIt is expected but not mandatory that this model will be extended by the specific block type\nwhere possible.",
       "type": "object",
       "properties": {
         "blocktype": {
@@ -267,6 +267,20 @@
             "type": "string"
           }
         },
+        "errors": {
+          "title": "Errors",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "warnings": {
+          "title": "Warnings",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
         "b64_encoded_image": {
           "title": "B64 Encoded Image",
           "type": "object",
@@ -276,7 +290,15 @@
         },
         "bokeh_plot_data": {
           "title": "Bokeh Plot Data",
-          "type": "string"
+          "type": "object"
+        },
+        "computed": {
+          "title": "Computed",
+          "type": "object"
+        },
+        "metadata": {
+          "title": "Metadata",
+          "type": "object"
         }
       },
       "required": [
diff --git a/pydatalab/schemas/startingmaterial.json b/pydatalab/schemas/startingmaterial.json
index 4ab9c9392..d6f180279 100644
--- a/pydatalab/schemas/startingmaterial.json
+++ b/pydatalab/schemas/startingmaterial.json
@@ -282,7 +282,7 @@
     },
     "DataBlockResponse": {
       "title": "DataBlockResponse",
-      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.",
+      "description": "A generic response model for a block, i.e., what is stored in `self.data`\nin the corresponding DataBlock class.\n\nIt is expected but not mandatory that this model will be extended by the specific block type\nwhere possible.",
       "type": "object",
       "properties": {
         "blocktype": {
@@ -320,6 +320,20 @@
             "type": "string"
           }
         },
+        "errors": {
+          "title": "Errors",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
+        "warnings": {
+          "title": "Warnings",
+          "type": "array",
+          "items": {
+            "type": "string"
+          }
+        },
         "b64_encoded_image": {
           "title": "B64 Encoded Image",
           "type": "object",
@@ -329,7 +343,15 @@
         },
         "bokeh_plot_data": {
           "title": "Bokeh Plot Data",
-          "type": "string"
+          "type": "object"
+        },
+        "computed": {
+          "title": "Computed",
+          "type": "object"
+        },
+        "metadata": {
+          "title": "Metadata",
+          "type": "object"
         }
       },
       "required": [
diff --git a/pydatalab/src/pydatalab/apps/chat/blocks.py b/pydatalab/src/pydatalab/apps/chat/blocks.py
index 0f59df075..5161f17cf 100644
--- a/pydatalab/src/pydatalab/apps/chat/blocks.py
+++ b/pydatalab/src/pydatalab/apps/chat/blocks.py
@@ -1,18 +1,29 @@
 import json
-import os
+import warnings
 
-from langchain_anthropic import ChatAnthropic
-from langchain_core.language_models.chat_models import BaseChatModel
-from langchain_openai import ChatOpenAI
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from pydantic import Field
 
 from pydatalab.blocks.base import DataBlock
 from pydatalab.logger import LOGGER
 from pydatalab.models import ITEM_MODELS
+from pydatalab.models.blocks import DataBlockResponse
 from pydatalab.utils import CustomJSONEncoder
 
+from .models import AVAILABLE_MODELS, ModelCard
+
 __all__ = ("ChatBlock",)
 
 
+class ChatBlockResponse(DataBlockResponse):
+    messages: list[dict] = Field(default_factory=list)
+    prompt: str | None
+    model: str
+    available_models: dict[str, ModelCard] = Field(exclude=True)
+    token_count: int | None
+    temperature: float
+
+
 class ChatBlock(DataBlock):
     """This block uses API calls to external LLMs via Langchain to provide a conversational
     interface to a user's data.
@@ -33,11 +44,12 @@ class ChatBlock(DataBlock):
 
     """
 
+    block_db_model = ChatBlockResponse
+
     blocktype = "chat"
     description = "Virtual LLM assistant block allows you to converse with your data."
     name = "Whinchat assistant"
     accepted_file_extensions = None
-    chat_client: BaseChatModel | None = None
 
     __supports_collections = True
 
@@ -48,106 +60,58 @@ class ChatBlock(DataBlock):
 Be as concise as possible. When saying your name, type a bird emoji right after whinchat 🐦.
         """,
         "temperature": 0.2,
-        "error_message": None,
         "model": "gpt-4o",
-        "available_models": {
-            "claude-3-5-sonnet-20241022": {
-                "name": "claude-3-5-sonnet-20241022",
-                "context_window": 200_000,
-                "input_cost_usd_per_MTok": 3.00,
-                "output_cost_usd_per_MTok": 15.00,
-            },
-            "claude-3-5-haiku-20241022": {
-                "name": "claude-3-haiku-20241022",
-                "context_window": 200_000,
-                "input_cost_usd_per_MTok": 1.00,
-                "output_cost_usd_per_MTok": 5.00,
-            },
-            "claude-3-haiku-20240307": {
-                "name": "claude-3-haiku-20240307",
-                "context_window": 200_000,
-                "input_cost_usd_per_MTok": 0.25,
-                "output_cost_usd_per_MTok": 1.25,
-            },
-            "claude-3-opus-20240229": {
-                "name": "claude-3-opus-20240229",
-                "context_window": 200000,
-                "input_cost_usd_per_MTok": 15.00,
-                "output_cost_usd_per_MTok": 75.00,
-            },
-            "gpt-4o": {
-                "name": "gpt-4o",
-                "context_window": 128000,
-                "input_cost_usd_per_MTok": 5.00,
-                "output_cost_usd_per_MTok": 15.00,
-            },
-            "gpt-4o-mini": {
-                "name": "gpt-4o-mini",
-                "context_window": 128_000,
-                "input_cost_usd_per_MTok": 0.15,
-                "output_cost_usd_per_MTok": 0.60,
-            },
-            "gpt-4": {
-                "name": "gpt-4",
-                "context_window": 8192,
-                "input_cost_usd_per_MTok": 30.00,
-                "output_cost_usd_per_MTok": 60.00,
-            },
-            "gpt-4-turbo": {
-                "name": "gpt-4-turbo",
-                "context_window": 128000,
-                "input_cost_usd_per_MTok": 10.00,
-                "output_cost_usd_per_MTok": 30.00,
-            },
-        },
+        "available_models": AVAILABLE_MODELS,
     }
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    def to_db(self):
-        """returns a dictionary with the data for this
-        block, ready to be input into mongodb"""
-        self.render()
-        return super().to_db()
-
     @property
     def plot_functions(self):
         return (self.render,)
 
-    def render(self):
-        if not self.data.get("messages"):
-            if (item_id := self.data.get("item_id")) is not None:
-                info_json = self._prepare_item_json_for_chat(item_id)
-            elif (collection_id := self.data.get("collection_id")) is not None:
-                info_json = self._prepare_collection_json_for_chat(collection_id)
-            else:
-                raise RuntimeError("No item or collection id provided")
+    def start_conversation(
+        self, item_data: dict | None = None, collection_data: dict | None = None
+    ):
+        """Starts a new conversation with the system prompt, embedding
+        the current item or collection data.
 
-            self.data["messages"] = [
-                {
-                    "role": "system",
-                    "content": self.defaults["system_prompt"],
-                },
-                {
-                    "role": "user",
-                    "content": f"""Here is the JSON data for the current item(s): {info_json}.
+        """
+
+        if (item_id := self.data.get("item_id")) is not None:
+            info_json = self._prepare_item_json_for_chat(item_id, item_data=item_data)
+        elif (collection_id := self.data.get("collection_id")) is not None:
+            info_json = self._prepare_collection_json_for_chat(
+                collection_id, collection_data=collection_data
+            )
+        else:
+            raise RuntimeError("No item or collection id provided")
+
+        self.data["messages"] = [
+            {
+                "role": "system",
+                "content": self.defaults["system_prompt"],
+            },
+            {
+                "role": "user",
+                "content": f"""Here is the JSON data for the current item(s): {info_json}.
 Start with a friendly introduction and give me a one sentence summary of what this is (not detailed, no information about specific masses). """,
-                },
-            ]
+            },
+        ]
+
+    def continue_conversation(self, prompt: str | None) -> None:
+        """Continues the conversation based on the passed user prompt.
+
+        Parameters:
+            prompt: The textual response from the user.
 
-        if self.data.get("prompt") and self.data.get("prompt").strip():
+        """
+        if prompt and prompt.strip():
             self.data["messages"].append(
                 {
                     "role": "user",
-                    "content": self.data["prompt"],
+                    "content": prompt,
                 }
             )
             self.data["prompt"] = None
-        else:
-            LOGGER.debug(
-                "Chat block: no prompt was provided (or prompt was entirely whitespace), so no inference will be performed"
-            )
 
         try:
             if self.data["messages"][-1].role not in ("user", "system"):
@@ -158,7 +122,7 @@ def render(self):
 
         if self.data.get("model") not in self.data.get("available_models", {}):
             bad_model = self.data.get("model")
-            self.data["error_message"] = (
+            warnings.warn(
                 f"Chatblock received an unknown or deprecated model: {bad_model}. Reverting to default model {self.defaults['model']}."
             )
             self.data["model"] = self.defaults["model"]
@@ -166,25 +130,14 @@ def render(self):
         try:
             model_name = self.data["model"]
 
-            model_dict = self.data["available_models"][model_name]
-            LOGGER.warning(f"Initializing chatblock with model: {model_name}")
+            model_cls = self.data["available_models"][model_name]
 
-            if model_name.startswith("claude"):
-                self.chat_client = ChatAnthropic(
-                    anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY"),
-                    model=model_name,
-                )
-            elif model_name.startswith("gpt"):
-                self.chat_client = ChatOpenAI(
-                    api_key=os.environ.get("OPENAI_API_KEY"),
-                    model=model_name,
-                )
+            chat_client = model_cls.chat_client(model=model_cls.name)
 
             LOGGER.debug(
-                f'submitting request to API for completion with last message role "{self.data["messages"][-1]["role"]}" (message = {self.data["messages"][-1:]}). Temperature = {self.data["temperature"]} (type {type(self.data["temperature"])})'
+                f'submitting request to API for completion with last message role "{self.data["messages"][-1]["role"]}" (message = {self.data["messages"][-1:]}).'
+                f"Temperature = {self.data['temperature']} (type {type(self.data['temperature'])})"
             )
-            from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
-
             # Convert your messages to the required format
             langchain_messages = []
             for message in self.data["messages"]:
@@ -195,54 +148,68 @@ def render(self):
                 else:
                     langchain_messages.append(AIMessage(content=message["content"]))
 
-            token_count = self.chat_client.get_num_tokens_from_messages(langchain_messages)
+            if model_cls.name == "langchain-debug":
+                token_count = sum(len(m["content"]) for m in self.data["messages"])
+            else:
+                token_count = chat_client.get_num_tokens_from_messages(langchain_messages)
 
             self.data["token_count"] = token_count
 
-            if token_count >= model_dict["context_window"]:
-                self.data["error_message"] = (
-                    f"""This conversation has reached its maximum context size and the chatbot won't be able to respond further ({token_count} tokens, max: {model_dict["context_window"]}). Please make a new chat block to start fresh, or use a model with a larger context window"""
+            if token_count >= model_cls.context_window:
+                raise RuntimeError(
+                    f"""This conversation has reached its maximum context size and the chatbot won't be able to respond further
+({token_count} tokens, max: {model_cls.context_window}).
+Please make a new chat block to start fresh, or use a model with a larger context window."""
                 )
-                return
 
             # Call the chat client with the invoke method
-            response = self.chat_client.invoke(langchain_messages)
+            response = chat_client.invoke(langchain_messages)
 
             langchain_messages.append(response)
 
-            token_count = self.chat_client.get_num_tokens_from_messages(langchain_messages)
-
-            self.data["token_count"] = token_count
+            # Now recalculate the token count after model output
             self.data["messages"].append({"role": "assistant", "content": response.content})
-            self.data["error_message"] = None
+            if model_cls.name == "langchain-debug":
+                token_count = sum(len(m["content"]) for m in self.data["messages"])
+            else:
+                token_count = chat_client.get_num_tokens_from_messages(langchain_messages)
+            self.data["token_count"] = token_count
 
         except Exception as exc:
-            LOGGER.debug("Received an error from API: %s", exc)
-            self.data["error_message"] = (
+            raise RuntimeError(
                 f"Received an error from the API: {exc}.\n\n Consider choosing a different model and reloading the block."
             )
-            return
 
-    def _prepare_item_json_for_chat(self, item_id: str):
+    def render(self):
+        if not self.data.get("messages"):
+            self.start_conversation()
+
+        self.continue_conversation(self.data.get("prompt"))
+
+    def _prepare_item_json_for_chat(self, item_id: str, item_data: dict | None = None):
         from pydatalab.routes.v0_1.items import get_item_data
 
-        item_info = get_item_data(item_id, load_blocks=False).json
+        if item_data is None:
+            item_data = get_item_data(item_id).json
+
+            if item_data["status"] != "success":
+                raise RuntimeError(f"Attempt to get item data for {item_id=} failed.")
 
-        model = ITEM_MODELS[item_info["item_data"]["type"]](**item_info["item_data"])
-        if model.blocks_obj:
-            model.blocks_obj = {
-                k: value for k, value in model.blocks_obj.items() if value["blocktype"] != "chat"
+        item_model = ITEM_MODELS[item_data["item_data"]["type"]](**item_data["item_data"])
+        if item_model.blocks_obj:
+            item_model.blocks_obj = {
+                k: block for k, block in item_model.blocks_obj.items() if block.blocktype != "chat"
             }
-        item_info = model.dict(exclude_none=True, exclude_unset=True)
-        item_info["type"] = model.type
+        item_data = item_model.dict(exclude_none=True, exclude_unset=True)
+        item_data["type"] = item_model.type
 
         # strip irrelevant or large fields
         item_filenames = {
-            str(file["immutable_id"]): file["name"] for file in item_info.get("files", [])
+            str(file["immutable_id"]): file["name"] for file in item_data.get("files", [])
         }
 
-        big_data_keys = ["bokeh_plot_data", "b64_encoded_image"]
-        for block in item_info.get("blocks_obj", {}).values():
+        big_data_keys = ["bokeh_plot_data", "b64_encoded_image", "computed"]
+        for block in item_data.get("blocks_obj", {}).values():
             block_fields_to_remove = ["item_id", "block_id", "collection_id"] + big_data_keys
             [block.pop(field, None) for field in block_fields_to_remove]
 
@@ -251,13 +218,12 @@ def _prepare_item_json_for_chat(self, item_id: str):
                 "acquisition_parameters",
                 "carrier_offset_Hz",
                 "nscans",
-                "processed_data",
                 "processed_data_shape",
                 "processing_parameters",
                 "pulse_program",
                 "selected_process",
             ]
-            [block.pop(field, None) for field in NMR_fields_to_remove]
+            [block["metadata"].pop(field, None) for field in NMR_fields_to_remove]
 
             # replace file_id with the actual filename
             file_id = block.pop("file_id", None)
@@ -276,22 +242,22 @@ def _prepare_item_json_for_chat(self, item_id: str):
         ]
 
         for k in top_level_keys_to_remove:
-            item_info.pop(k, None)
+            item_data.pop(k, None)
 
-        for ind, f in enumerate(item_info.get("relationships", [])):
-            item_info["relationships"][ind] = {
+        for ind, f in enumerate(item_data.get("relationships", [])):
+            item_data["relationships"][ind] = {
                 k: v for k, v in f.items() if k in ["item_id", "type", "relation"]
             }
-        item_info["files"] = [file["name"] for file in item_info.get("files", [])]
-        item_info["creators"] = [
-            creator["display_name"] for creator in item_info.get("creators", [])
+        item_data["files"] = [file["name"] for file in item_data.get("files", [])]
+        item_data["creators"] = [
+            creator["display_name"] for creator in item_data.get("creators", [])
         ]
 
         # move blocks from blocks_obj to a simpler list to further cut down tokens,
         # especially in alphanumeric block_id fields
-        item_info["blocks"] = [block for block in item_info.pop("blocks_obj", {}).values()]
+        item_data["blocks"] = [block for block in item_data.pop("blocks_obj", {}).values()]
 
-        item_info = {k: value for k, value in item_info.items() if value}
+        item_data = {k: value for k, value in item_data.items() if value}
 
         for key in [
             "synthesis_constituents",
@@ -299,10 +265,8 @@ def _prepare_item_json_for_chat(self, item_id: str):
             "negative_electrode",
             "electrolyte",
         ]:
-            if key in item_info:
-                for constituent in item_info[key]:
-                    LOGGER.debug("iterating through constituents:")
-                    LOGGER.debug(constituent)
+            if key in item_data:
+                for constituent in item_data[key]:
                     if "quantity" in constituent:
                         constituent["quantity"] = (
                             f"{constituent.get('quantity', 'unknown')} {constituent.get('unit', '')}"
@@ -311,7 +275,7 @@ def _prepare_item_json_for_chat(self, item_id: str):
 
         # Note manual replaces to help avoid escape sequences that take up extra tokens
         item_info_json = (
-            json.dumps(item_info, cls=CustomJSONEncoder)
+            json.dumps(item_data, cls=CustomJSONEncoder)
             .replace('"', "'")
             .replace(r"\'", "'")
             .replace(r"\n", " ")
@@ -319,12 +283,16 @@ def _prepare_item_json_for_chat(self, item_id: str):
 
         return item_info_json
 
-    def _prepare_collection_json_for_chat(self, collection_id: str):
+    def _prepare_collection_json_for_chat(
+        self, collection_id: str, collection_data: dict | None = None
+    ):
         from pydatalab.routes.v0_1.collections import get_collection
 
-        collection_data = get_collection(collection_id).json
-        if collection_data["status"] != "success":
-            raise RuntimeError(f"Attempt to get collection data for {collection_id} failed.")
+        if not collection_data:
+            collection_data = get_collection(collection_id).json
+
+            if collection_data["status"] != "success":
+                raise RuntimeError(f"Attempt to get collection data for {collection_id} failed.")
 
         children = collection_data["child_items"]
         return (
diff --git a/pydatalab/src/pydatalab/apps/chat/models.py b/pydatalab/src/pydatalab/apps/chat/models.py
new file mode 100644
index 000000000..c1d5f3ce9
--- /dev/null
+++ b/pydatalab/src/pydatalab/apps/chat/models.py
@@ -0,0 +1,85 @@
+from langchain_anthropic import ChatAnthropic
+from langchain_core.language_models import BaseChatModel, ParrotFakeChatModel
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+
+class ModelCard(BaseModel):
+    name: str
+    context_window: int
+    input_cost_usd_per_MTok: float
+    output_cost_usd_per_MTok: float
+    chat_client: type[BaseChatModel]
+
+
+__all__ = ("AVAILABLE_MODELS", "ModelCard")
+
+_AVAILABLE_MODELS = [
+    {
+        "name": "langchain-debug",
+        "context_window": 200_000_000,
+        "input_cost_usd_per_MTok": 0.0,
+        "output_cost_usd_per_MTok": 0.0,
+        "chat_client": ParrotFakeChatModel,
+    },
+    {
+        "name": "claude-3-5-sonnet-20241022",
+        "context_window": 200_000,
+        "input_cost_usd_per_MTok": 3.00,
+        "output_cost_usd_per_MTok": 15.00,
+        "chat_client": ChatAnthropic,
+    },
+    {
+        "name": "claude-3-haiku-20241022",
+        "context_window": 200_000,
+        "input_cost_usd_per_MTok": 1.00,
+        "output_cost_usd_per_MTok": 5.00,
+        "chat_client": ChatAnthropic,
+    },
+    {
+        "name": "claude-3-haiku-20240307",
+        "context_window": 200_000,
+        "input_cost_usd_per_MTok": 0.25,
+        "output_cost_usd_per_MTok": 1.25,
+        "chat_client": ChatAnthropic,
+    },
+    {
+        "name": "claude-3-opus-20240229",
+        "context_window": 200000,
+        "input_cost_usd_per_MTok": 15.00,
+        "output_cost_usd_per_MTok": 75.00,
+        "chat_client": ChatAnthropic,
+    },
+    {
+        "name": "gpt-4o",
+        "context_window": 128000,
+        "input_cost_usd_per_MTok": 5.00,
+        "output_cost_usd_per_MTok": 15.00,
+        "chat_client": ChatOpenAI,
+    },
+    {
+        "name": "gpt-4o-mini",
+        "context_window": 128_000,
+        "input_cost_usd_per_MTok": 0.15,
+        "output_cost_usd_per_MTok": 0.60,
+        "chat_client": ChatOpenAI,
+    },
+    {
+        "name": "gpt-4",
+        "context_window": 8192,
+        "input_cost_usd_per_MTok": 30.00,
+        "output_cost_usd_per_MTok": 60.00,
+        "chat_client": ChatOpenAI,
+    },
+    {
+        "name": "gpt-4-turbo",
+        "context_window": 128000,
+        "input_cost_usd_per_MTok": 10.00,
+        "output_cost_usd_per_MTok": 30.00,
+        "chat_client": ChatOpenAI,
+    },
+]
+
+AVAILABLE_MODELS: dict[str, ModelCard] = {
+    model["name"]: ModelCard(**model) for model in _AVAILABLE_MODELS
+}
diff --git a/pydatalab/src/pydatalab/apps/nmr/blocks.py b/pydatalab/src/pydatalab/apps/nmr/blocks.py
index 7fa12a223..7d5d8956e 100644
--- a/pydatalab/src/pydatalab/apps/nmr/blocks.py
+++ b/pydatalab/src/pydatalab/apps/nmr/blocks.py
@@ -23,6 +23,7 @@ class NMRBlock(DataBlock):
     description = "A data block for loading and visualizing 1D NMR data from Bruker projects or JCAMP-DX files."
 
     accepted_file_extensions = BRUKER_FILE_EXTENSIONS + JCAMP_FILE_EXTENSIONS
+    processed_data: dict | None = None
     defaults = {"process number": 1}
     _supports_collections = False
 
@@ -34,7 +35,7 @@ def read_bruker_nmr_data(
         self,
         filename: str | Path | None = None,
         file_info: dict | None = None,
-    ) -> tuple[dict | None, dict] | None:
+    ) -> tuple[dict | None, dict]:
         """Loads a Bruker project from the passed or attached zip file
         and parses it into a serialized dataframe and metadata dictionary.
 
@@ -119,7 +120,6 @@ def read_bruker_nmr_data(
         metadata["title"] = topspin_title
 
         self.data["metadata"] = metadata
-        self.data["processed_data"] = serialized_df
 
         return serialized_df, metadata
 
@@ -202,24 +202,26 @@ def read_jcamp_nmr_data(
             pass
 
         serialized_df = df.to_dict() if (df is not None) else None
-
-        self.data["processed_data"] = serialized_df
         self.data["metadata"] = metadata
 
+        return serialized_df, metadata
+
     def load_nmr_data(self, file_info: dict):
         location, name, ext = self._extract_file_info(file_info=file_info)
 
         if ext == ".zip":
-            self.read_bruker_nmr_data(file_info=file_info)
+            df, metadata = self.read_bruker_nmr_data(file_info=file_info)
 
         elif ext in (".jdx", ".dx"):
-            self.read_jcamp_nmr_data(file_info=file_info)
+            df, metadata = self.read_jcamp_nmr_data(file_info=file_info)
 
         else:
             raise RuntimeError(
                 f"Unsupported file extension for NMR reader: {ext} (must be one of {self.accepted_file_extensions})"
             )
 
+        return df
+
     def generate_nmr_plot(self, parse: bool = True):
         """Generate an NMR plot and store processed data for the
         data files attached to this block.
@@ -232,7 +234,7 @@ def generate_nmr_plot(self, parse: bool = True):
             file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
             name, ext = os.path.splitext(file_info["name"])
 
-            self.load_nmr_data(file_info)
+            self.processed_data = self.load_nmr_data(file_info)
 
         processed_data_shape = self.data.get("metadata", {}).get("processed_data_shape", [])
         if not processed_data_shape or len(processed_data_shape) > 1:
@@ -241,14 +243,14 @@ def generate_nmr_plot(self, parse: bool = True):
             )
             return
 
-        if "processed_data" not in self.data or not self.data["processed_data"]:
+        if not self.processed_data:
             self.data["bokeh_plot_data"] = None
             warnings.warn(
                 "No compatible processed data available for plotting, only metadata will be displayed."
             )
             return
 
-        df = pd.DataFrame(self.data["processed_data"])
+        df = pd.DataFrame(self.processed_data)
         df["normalized intensity"] = df.intensity / df.intensity.max()
 
         self.data["bokeh_plot_data"] = self.make_nmr_plot(df, self.data["metadata"])
diff --git a/pydatalab/src/pydatalab/apps/xrd/blocks.py b/pydatalab/src/pydatalab/apps/xrd/blocks.py
index 4990ff67c..00c631255 100644
--- a/pydatalab/src/pydatalab/apps/xrd/blocks.py
+++ b/pydatalab/src/pydatalab/apps/xrd/blocks.py
@@ -262,7 +262,8 @@ def generate_xrd_plot(self, filenames: list[str | Path] | None = None) -> None:
                 pattern_df["normalized intensity (staggered)"] += ind
                 pattern_dfs.append(pattern_df)
 
-            self.data["peak_data"] = peak_information
+            self.data["computed"] = {}
+            self.data["computed"]["peak_data"] = peak_information
 
         elif filenames is None:
             file_info = get_file_info_by_id(self.data["file_id"], update_if_live=True)
@@ -284,9 +285,9 @@ def generate_xrd_plot(self, filenames: list[str | Path] | None = None) -> None:
                 f"{self.data.get('wavelength', self.defaults['wavelength'])} Å"
             )
             peak_model = PeakInformation(**peak_data)
-            if "peak_data" not in self.data:
-                self.data["peak_data"] = {}
-            self.data["peak_data"][str(file_info["immutable_id"])] = peak_model.dict()
+            if "computed" not in self.data:
+                self.data["computed"] = {"peak_data": {}}
+            self.data["computed"]["peak_data"][str(file_info["immutable_id"])] = peak_model.dict()
             pattern_dfs = [pattern_df]
 
         else:
@@ -303,9 +304,10 @@ def generate_xrd_plot(self, filenames: list[str | Path] | None = None) -> None:
                 pattern_dfs.append(pattern_df)
 
                 peak_model = PeakInformation(**peak_data)
-                if "peak_data" not in self.data:
-                    self.data["peak_data"] = {}
-                self.data["peak_data"][f] = peak_model.dict()
+                if "computed" not in self.data:
+                    self.data["computed"] = {"peak_data": {}}
+                self.data["computed"]["peak_data"][f] = peak_model.dict()
+                pattern_dfs = [pattern_df]
 
         if pattern_dfs:
             p = self._make_plots(pattern_dfs, y_options)
diff --git a/pydatalab/src/pydatalab/blocks/base.py b/pydatalab/src/pydatalab/blocks/base.py
index 25e86925c..25034a5f7 100644
--- a/pydatalab/src/pydatalab/blocks/base.py
+++ b/pydatalab/src/pydatalab/blocks/base.py
@@ -1,4 +1,5 @@
 import functools
+import json
 import random
 import warnings
 from collections.abc import Callable, Sequence
@@ -6,6 +7,7 @@
 
 from pydatalab import __version__
 from pydatalab.logger import LOGGER
+from pydatalab.models.blocks import DataBlockResponse
 
 __all__ = ("generate_random_id", "DataBlock", "generate_js_callback_single_float_parameter")
 
@@ -100,6 +102,8 @@ def generate_random_id():
 class DataBlock:
     """Base class for a data block."""
 
+    block_db_model = DataBlockResponse
+
     name: str = "base"
     """The human-readable block name specifying which technique
     or file format it pertains to.
@@ -191,39 +195,14 @@ def __init__(
             collection_id,
         )
 
-    def to_db(self):
+    def to_db(self) -> dict:
         """returns a dictionary with the data for this
         block, ready to be input into mongodb"""
-        from bson import ObjectId
 
         LOGGER.debug("Casting block %s to database object.", self.__class__.__name__)
-
-        self.data.pop("bokeh_plot_data", None)
-        self.data.pop("b64_encoded_image", None)
-
-        if "file_id" in self.data:
-            dict_for_db = self.data.copy()  # gross, I know
-            dict_for_db["file_id"] = ObjectId(dict_for_db["file_id"])
-            return dict_for_db
-
-        return self.data
-
-    @classmethod
-    def from_db(cls, block: dict):
-        """Create a block from data stored in the database."""
-        LOGGER.debug("Loading block %s from database object.", cls.__class__.__name__)
-        new_block = cls(
-            item_id=block.get("item_id"),
-            collection_id=block.get("collection_id"),
-            init_data=block,
+        return self.block_db_model(**self.data).dict(
+            exclude={"bokeh_plot_data", "b64_encoded_image"}, exclude_unset=True
         )
-        if "file_id" in new_block.data:
-            new_block.data["file_id"] = str(new_block.data["file_id"])
-
-        if new_block.data.get("title", "") == new_block.description:
-            new_block.data["title"] = new_block.name
-
-        return new_block
 
     def to_web(self) -> dict[str, Any]:
         """Returns a JSON serializable dictionary to render the data block on the web."""
@@ -258,7 +237,9 @@ def to_web(self) -> dict[str, Any]:
         else:
             self.data.pop("warnings", None)
 
-        return self.data
+        model = self.block_db_model(**self.data)
+        serialized = json.loads(model.json(exclude_unset=True, exclude_none=True))
+        return serialized
 
     def process_events(self, events: list[dict] | dict):
         """Handle any supported events passed to the block."""
@@ -324,7 +305,6 @@ def from_web(cls, data: dict):
             data: The block data to initialiaze the block with.
 
         """
-        LOGGER.debug("Loading block %s from web request.", cls.__class__.__name__)
         block = cls(
             item_id=data.get("item_id"),
             collection_id=data.get("collection_id"),
@@ -334,9 +314,9 @@ def from_web(cls, data: dict):
         return block
 
     def update_from_web(self, data: dict):
-        """Update the block with data received from a web request.
-
-        Only updates fields that are specified in the dictionary - other fields are left alone
+        """Update the block with validated data received from a web request.
+        Will strip any fields that are "computed" or otherwise not controllable
+        by the user.
 
         Parameters:
             data: A dictionary of data to update the block with.
@@ -346,6 +326,11 @@ def update_from_web(self, data: dict):
             "Updating block %s from web request",
             self.__class__.__name__,
         )
-        self.data.update(data)
+        self.data.update(
+            self.block_db_model(**data).dict(
+                exclude={"computed", "metadata", "bokeh_plot_data", "b64_encoded_image"},
+                exclude_unset=True,
+            )
+        )
 
         return self
diff --git a/pydatalab/src/pydatalab/models/blocks.py b/pydatalab/src/pydatalab/models/blocks.py
index cec98577c..d4ecfaac3 100644
--- a/pydatalab/src/pydatalab/models/blocks.py
+++ b/pydatalab/src/pydatalab/models/blocks.py
@@ -6,6 +6,9 @@
 class DataBlockResponse(BaseModel):
     """A generic response model for a block, i.e., what is stored in `self.data`
     in the corresponding DataBlock class.
+
+    It is expected but not mandatory that this model will be extended by the specific block type
+    where possible.
     """
 
     blocktype: str
@@ -32,12 +35,28 @@ class DataBlockResponse(BaseModel):
     file_ids: list[PyObjectId] | None = None
     """A list of file IDs associated with the block, if any."""
 
-    b64_encoded_image: dict[PyObjectId, str] | None = None
-    """Any base64-encoded image data associated with the block, keyed by file_id, if any."""
+    errors: list[str] | None = None
+    """Any errors that occurred during block processing."""
+
+    warnings: list[str] | None = None
+    """Any warnings that occurred during block processing."""
+
+    b64_encoded_image: dict[str, str] | None
+    """Any base64-encoded image data associated with the block, keyed by `file_id`."""
 
-    bokeh_plot_data: str | None = None
+    bokeh_plot_data: dict | None
     """A JSON-encoded string containing the Bokeh plot data, if any."""
 
+    computed: dict | None = None
+    """Any processed or computed data associated with the block, small enough to store and filter directly in the database,
+    i.e., strings or a few hundred numbers not exceeding 16KB in size.
+    Examples could include peak positions, and widths, but not the full spectrum.
+    """
+
+    metadata: dict | None = None
+    """Any structured metadata associated with the block, for example,
+    experimental acquisition parameters."""
+
     class Config:
         allow_population_by_field_name = True
         json_encoders = JSON_ENCODERS
diff --git a/pydatalab/src/pydatalab/routes/v0_1/blocks.py b/pydatalab/src/pydatalab/routes/v0_1/blocks.py
index 2e014ceed..131ac56fe 100644
--- a/pydatalab/src/pydatalab/routes/v0_1/blocks.py
+++ b/pydatalab/src/pydatalab/routes/v0_1/blocks.py
@@ -33,10 +33,6 @@ def add_data_block():
 
     block = BLOCK_TYPES[block_type](item_id=item_id)
 
-    data = block.to_db()
-
-    # currently, adding to both blocks and blocks_obj to mantain compatibility with
-    # the old site. The new site only uses blocks_obj
     if insert_index:
         display_order_update = {
             "$each": [block.block_id],
@@ -48,8 +44,8 @@ def add_data_block():
     result = flask_mongo.db.items.update_one(
         {"item_id": item_id, **get_default_permissions(user_only=True)},
         {
-            "$push": {"blocks": data, "display_order": display_order_update},
-            "$set": {f"blocks_obj.{block.block_id}": data},
+            "$push": {"display_order": display_order_update},
+            "$set": {f"blocks_obj.{block.block_id}": block.to_db()},
         },
     )
 
@@ -140,7 +136,7 @@ def add_collection_data_block():
     )
 
 
-def _save_block_to_db(block: DataBlock) -> bool:
+def _save_block_to_db(block: DataBlock) -> None:
     """Save data for a single block within an item to the database,
     overwriting previous data saved there.
 
@@ -171,8 +167,6 @@ def _save_block_to_db(block: DataBlock) -> bool:
             f"Failed to save block, likely because item_id ({block.data.get('item_id')}), collection_id ({block.data.get('collection_id')}) and/or block_id ({block.block_id}) wasn't found"
         )
 
-    return True
-
 
 @BLOCKS.route("/update-block/", methods=["POST"])
 @BLOCKS.route("/blocks/", methods=["POST"])
@@ -185,7 +179,7 @@ def update_block():
     request_json = request.get_json()
     block_data = request_json["block_data"]
     event_data = request_json.get("event_data", None)
-    save_to_db = request_json.get("save_to_db", False)
+
     block_type = block_data["blocktype"]
 
     if block_type not in BLOCK_TYPES:
@@ -201,14 +195,17 @@ def update_block():
         except NotImplementedError:
             pass
 
-    saved_successfully = False
-    if save_to_db:
-        saved_successfully = _save_block_to_db(block)
+    # Save state from UI
+    _save_block_to_db(block)
+
+    # Reload the block with new UI state
+    new_block_data = block.to_web()
+
+    # Save results to DB
+    _save_block_to_db(block)
 
     return (
-        jsonify(
-            status="success", saved_successfully=saved_successfully, new_block_data=block.to_web()
-        ),
+        jsonify(status="success", saved_successfully=True, new_block_data=new_block_data),
         200,
     )
 
diff --git a/pydatalab/src/pydatalab/routes/v0_1/items.py b/pydatalab/src/pydatalab/routes/v0_1/items.py
index f9d1c1d23..a886f57e2 100644
--- a/pydatalab/src/pydatalab/routes/v0_1/items.py
+++ b/pydatalab/src/pydatalab/routes/v0_1/items.py
@@ -28,61 +28,6 @@
 def _(): ...
 
 
-def reserialize_blocks(display_order: list[str], blocks_obj: dict[str, dict]) -> dict[str, dict]:
-    """Create the corresponding Python objects from JSON block data, then
-    serialize it again as JSON to populate any missing properties.
-
-    Parameters:
-        blocks_obj: A dictionary containing the JSON block data, keyed by block ID.
-
-    Returns:
-        A dictionary with the re-serialized block data.
-
-    """
-    for block_id in display_order:
-        try:
-            block_data = blocks_obj[block_id]
-        except KeyError:
-            LOGGER.warning(f"block_id {block_id} found in display order but not in blocks_obj")
-            continue
-        blocktype = block_data["blocktype"]
-        blocks_obj[block_id] = (
-            BLOCK_TYPES.get(blocktype, BLOCK_TYPES["notsupported"]).from_db(block_data).to_web()
-        )
-
-    return blocks_obj
-
-
-# Seems to be obselete now?
-def dereference_files(file_ids: list[str | ObjectId]) -> dict[str, dict]:
-    """For a list of Object IDs (as strings or otherwise), query the files collection
-    and return a dictionary of the data stored under each ID.
-
-    Parameters:
-        file_ids: The list of IDs of files to return;
-
-    Returns:
-        The dereferenced data as a dictionary with (string) ID keys.
-
-    """
-    results = {
-        str(f["_id"]): f
-        for f in flask_mongo.db.files.find(
-            {
-                "_id": {"$in": [ObjectId(_id) for _id in file_ids]},
-            }
-        )
-    }
-    if len(results) != len(file_ids):
-        raise RuntimeError(
-            "Some file IDs did not have corresponding database entries.\n"
-            f"Returned: {list(results.keys())}\n"
-            f"Requested: {file_ids}\n"
-        )
-
-    return results
-
-
 @ITEMS.route("/equipment/", methods=["GET"])
 def get_equipment_summary():
     _project = {
@@ -832,17 +777,9 @@ def delete_sample():
 
 @ITEMS.route("/items/<refcode>", methods=["GET"])
 @ITEMS.route("/get-item-data/<item_id>", methods=["GET"])
-def get_item_data(
-    item_id: str | None = None, refcode: str | None = None, load_blocks: bool = False
-):
+def get_item_data(item_id: str | None = None, refcode: str | None = None):
     """Generates a JSON response for the item with the given `item_id`,
     or `refcode` additionally resolving relationships to files and other items.
-
-    Parameters:
-       load_blocks: Whether to regenerate any data blocks associated with this
-           sample (i.e., create the Python object corresponding to the block and
-           call its render function).
-
     """
     redirect_to_ui = bool(request.args.get("redirect-to-ui", default=False, type=json.loads))
     if refcode and redirect_to_ui and CONFIG.APP_URL:
@@ -911,8 +848,6 @@ def get_item_data(
             raise KeyError(f"Item {item_id=} has no type field in document.")
 
     doc = ItemModel(**doc)
-    if load_blocks:
-        doc.blocks_obj = reserialize_blocks(doc.display_order, doc.blocks_obj)
 
     # find any documents with relationships that mention this document
     relationships_query_results = flask_mongo.db.items.find(
diff --git a/pydatalab/tests/apps/test_chat.py b/pydatalab/tests/apps/test_chat.py
new file mode 100644
index 000000000..8285dc632
--- /dev/null
+++ b/pydatalab/tests/apps/test_chat.py
@@ -0,0 +1,32 @@
+from pydatalab.apps.chat import ChatBlock
+
+
+def test_chatblock():
+    chat = ChatBlock(item_id="test")
+    chat.data["model"] = "langchain-debug"
+
+    chat.start_conversation(item_data={"item_data": {"type": "samples", "item_id": "test"}})
+
+    assert chat.data["messages"][0]["content"].startswith("You are whinchat (lowercase w)")
+    assert chat.data["messages"][1]["content"].startswith("Here is the JSON data for")
+
+    chat.render()
+    assert chat.data["messages"][2]["role"] == "assistant"
+
+    chat.data["prompt"] = "Hello there"
+    chat.render()
+
+    assert chat.data["messages"][-1] == {"role": "assistant", "content": "Hello there"}
+    assert chat.data["messages"][-2] == {"role": "user", "content": "Hello there"}
+    assert chat.data["messages"][0]["content"].startswith("You are whinchat (lowercase w)")
+    assert chat.data["messages"][1]["content"].startswith("Here is the JSON data for")
+    assert len(chat.data["messages"]) == 5
+
+    chat.data["prompt"] = "Hello there again"
+    chat.render()
+
+    assert chat.data["messages"][-1] == {"role": "assistant", "content": "Hello there again"}
+    assert chat.data["messages"][-2] == {"role": "user", "content": "Hello there again"}
+    assert chat.data["messages"][0]["content"].startswith("You are whinchat (lowercase w)")
+    assert chat.data["messages"][1]["content"].startswith("Here is the JSON data for")
+    assert len(chat.data["messages"]) == 7
diff --git a/pydatalab/tests/apps/test_nmr.py b/pydatalab/tests/apps/test_nmr.py
index c0c933801..4eea765ca 100644
--- a/pydatalab/tests/apps/test_nmr.py
+++ b/pydatalab/tests/apps/test_nmr.py
@@ -91,28 +91,30 @@ def test_nmr_block(
     nmr_1d_solution_path, nmr_1d_solution_path_renamed, nmr_1d_solid_path, nmr_2d_matpass_path
 ):
     block = NMRBlock(item_id="nmr-block")
-    block.read_bruker_nmr_data(nmr_1d_solid_path)
+    block.processed_data, block.data["metadata"] = block.read_bruker_nmr_data(nmr_1d_solid_path)
     assert block.data["metadata"]["topspin_title"].split("\n")[0] == "7Li 40 kHz 40 C hahn-echo"
     block.generate_nmr_plot(parse=False)
     plot = block.data["bokeh_plot_data"]
     assert plot is not None
 
     block = NMRBlock(item_id="nmr-block")
-    block.read_bruker_nmr_data(nmr_1d_solution_path)
+    block.processed_data, block.data["metadata"] = block.read_bruker_nmr_data(nmr_1d_solution_path)
     assert block.data["metadata"]["topspin_title"].split("\n")[0] == "31P reference, 85% H3PO4"
     block.generate_nmr_plot(parse=False)
     plot = block.data["bokeh_plot_data"]
     assert plot is not None
 
     block = NMRBlock(item_id="nmr-block")
-    block.read_bruker_nmr_data(nmr_1d_solution_path_renamed)
+    block.processed_data, block.data["metadata"] = block.read_bruker_nmr_data(
+        nmr_1d_solution_path_renamed
+    )
     assert block.data["metadata"]["topspin_title"].split("\n")[0] == "31P reference, 85% H3PO4"
     block.generate_nmr_plot(parse=False)
     plot = block.data["bokeh_plot_data"]
     assert plot is not None
 
     block = NMRBlock(item_id="nmr-block")
-    block.read_bruker_nmr_data(nmr_2d_matpass_path)
+    block.processed_data, block.data["metadata"] = block.read_bruker_nmr_data(nmr_2d_matpass_path)
     assert block.data["metadata"]["topspin_title"].split("\n")[0] == "7Li 40kHz 40 C MATPASS"
     # catch warning about processed data
     with pytest.warns(UserWarning, match="Only metadata"):
diff --git a/pydatalab/tests/server/conftest.py b/pydatalab/tests/server/conftest.py
index 43fc0d923..c8a7e2fe7 100644
--- a/pydatalab/tests/server/conftest.py
+++ b/pydatalab/tests/server/conftest.py
@@ -71,6 +71,7 @@ def app_config(tmp_path_factory):
         "REMOTE_FILESYSTEMS": example_remotes,
         "FILE_DIRECTORY": str(tmp_path_factory.mktemp("files")),
         "TESTING": False,
+        "ROOT_PATH": "/",
         "EMAIL_AUTH_SMTP_SETTINGS": {
             "MAIL_SERVER": "smtp.example.com",
             "MAIL_PORT": 587,
diff --git a/pydatalab/tests/server/test_blocks.py b/pydatalab/tests/server/test_blocks.py
index 697e8063f..9f2ec5135 100644
--- a/pydatalab/tests/server/test_blocks.py
+++ b/pydatalab/tests/server/test_blocks.py
@@ -1,11 +1,8 @@
-from pathlib import Path
-
 import pytest
 
 from pydatalab.apps import BLOCK_TYPES, BLOCKS
 
 
-@pytest.mark.dependency()
 def test_get_all_available_block_types():
     """Test that we can enumerate all available block types."""
     assert len(BLOCKS) > 0
@@ -21,7 +18,6 @@ def test_get_all_available_block_types():
         assert BLOCK_TYPES[block_class.blocktype] == block_class
 
 
-@pytest.mark.dependency(depends=["test_get_all_available_block_types"])
 @pytest.mark.parametrize("block_type", list(BLOCK_TYPES.keys()))
 def test_create_sample_with_each_block_type(admin_client, block_type, default_sample_dict):
     """Test creating a sample and adding each available block type via API."""
@@ -30,9 +26,7 @@ def test_create_sample_with_each_block_type(admin_client, block_type, default_sa
     sample_data["item_id"] = sample_id
 
     response = admin_client.post("/new-sample/", json=sample_data)
-    assert response.status_code == 201, (
-        f"Failed to create sample for {block_type}: {response.json()}"
-    )
+    assert response.status_code == 201, f"Failed to create sample for {block_type}: {response.json}"
     assert response.json["status"] == "success"
 
     response = admin_client.post(
@@ -44,7 +38,7 @@ def test_create_sample_with_each_block_type(admin_client, block_type, default_sa
         },
     )
 
-    assert response.status_code == 200, f"Failed to add {block_type} block: {response.json()}"
+    assert response.status_code == 200, f"Failed to add {block_type} block: {response.json}"
     assert response.json["status"] == "success"
     assert response.json["new_block_obj"]
     assert response.json["new_block_obj"]["blocktype"] == block_type
@@ -61,7 +55,6 @@ def test_create_sample_with_each_block_type(admin_client, block_type, default_sa
     assert first_block["blocktype"] == block_type
 
 
-@pytest.mark.dependency(depends=["test_get_all_available_block_types"])
 def test_invalid_block_type(admin_client, default_sample_dict):
     """Test that invalid block types are rejected."""
     sample_id = "test_sample_invalid_block"
@@ -84,7 +77,6 @@ def test_invalid_block_type(admin_client, default_sample_dict):
     assert "Invalid block type" in response.json["message"]
 
 
-@pytest.mark.dependency(depends=["test_get_all_available_block_types"])
 def test_add_multiple_blocks_to_sample(admin_client, default_sample_dict):
     """Test adding multiple different block types to the same sample."""
     sample_id = "test_sample_multiple_blocks"
@@ -169,7 +161,6 @@ def test_block_permissions(client, admin_client, unauthenticated_client, default
     assert response.status_code == 401
 
 
-@pytest.mark.dependency(depends=["test_get_all_available_block_types"])
 def test_add_block_to_nonexistent_item(admin_client):
     """Test that adding a block to a nonexistent item fails gracefully."""
     block_type = list(BLOCK_TYPES.keys())[0]
@@ -187,7 +178,6 @@ def test_add_block_to_nonexistent_item(admin_client):
     assert "Update failed" in response.json["message"]
 
 
-@pytest.mark.dependency(depends=["test_get_all_available_block_types"])
 def test_block_info_endpoint_contains_all_blocks(client):
     """Test that the /info/blocks endpoint returns all available block types."""
     response = client.get("/info/blocks")
@@ -201,11 +191,10 @@ def test_block_info_endpoint_contains_all_blocks(client):
     )
 
 
-@pytest.mark.dependency(depends=["test_get_all_available_block_types"])
-def test_create_sample_with_example_files(admin_client, default_sample_dict):
-    """Create a  test sample with multiple block types and attached example files."""
+def test_uvvis_block_lifecycle(admin_client, default_sample_dict, example_data_dir):
+    block_type = "uv-vis"
 
-    sample_id = "test_sample_with_files"
+    sample_id = f"test_sample_with_files-{block_type}-lifecycle"
     sample_data = default_sample_dict.copy()
     sample_data["item_id"] = sample_id
 
@@ -213,182 +202,345 @@ def test_create_sample_with_example_files(admin_client, default_sample_dict):
     assert response.status_code == 201
     assert response.json["status"] == "success"
 
-    block_file_mapping = {
-        "tabular": "csv",
-        "cycle": "echem",
-        "ftir": "FTIR",
-        "nmr": "NMR",
-        "raman": "raman",
-        "ms": "TGA-MS",
-        "uv-vis": "UV-Vis",
-        "xrd": "XRD",
-        "media": "media",
-    }
-
-    preferred_files = {
-        "cycle": "jdb11-1_c3_gcpl_5cycles_2V-3p8V_C-24_data_C09.mpr",
-        "tabular": "simple.csv",
-        "ftir": "2024-10-10_FeSO4_ref.asp",
-        "nmr": "1.zip",
-        "raman": "raman_example.txt",
-        "ms": "20221128 134958 TGA MS Megan.asc",
-        "xrd": "Scan_C4.xrdml",
-        "media": "grey_group_logo.tif",
-    }
-
-    example_data_path = Path(__file__).parent.parent.parent / "example_data"
-    added_blocks = {}
-    uploaded_files = []
-    block_index = 0
-
-    for block_type, folder_name in block_file_mapping.items():
+    response = admin_client.post(
+        "/add-data-block/",
+        json={
+            "block_type": block_type,
+            "item_id": sample_id,
+            "index": 0,
+        },
+    )
+
+    assert response.status_code == 200, f"Failed to add {block_type} block: {response.json}"
+    assert response.json["status"] == "success"
+
+    block_data = response.json["new_block_obj"]
+    block_id = block_data["block_id"]
+
+    uvvis_folder = example_data_dir / "UV-Vis"
+
+    example_files = uvvis_folder.glob("*")
+    example_file_ids = []
+
+    for example_file in example_files:
+        with open(example_file, "rb") as f:
+            response = admin_client.post(
+                "/upload-file/",
+                buffered=True,
+                content_type="multipart/form-data",
+                data={
+                    "item_id": sample_id,
+                    "file": [(f, example_file.name)],
+                    "type": "application/octet-stream",
+                    "replace_file": "null",
+                    "relativePath": "null",
+                },
+            )
+            assert response.status_code == 201, f"Failed to upload {example_file.name}"
+            assert response.json["status"] == "success"
+            file_id = response.json["file_id"]
+            example_file_ids.append(file_id)
+
+    assert len(example_file_ids) == 3
+
+    response = admin_client.get(f"/get-item-data/{sample_id}")
+    assert response.status_code == 200
+    item_data = response.json["item_data"]
+    block_data = item_data["blocks_obj"][block_id]
+    block_data["file_id"] = example_file_ids[0]
+    block_data["selected_file_order"] = example_file_ids
+
+    response = admin_client.post("/update-block/", json={"block_data": block_data})
+
+    assert response.status_code == 200
+    web_block = response.json["new_block_data"]
+    assert "bokeh_plot_data" in web_block
+    assert web_block.get("errors") is None
+
+
+def test_xrd_block_lifecycle(admin_client, default_sample_dict, example_data_dir):
+    from pydatalab.apps.xrd import XRDBlock
+
+    block_type = "xrd"
+
+    sample_id = f"test_sample_with_files-{block_type}-lifecycle"
+    sample_data = default_sample_dict.copy()
+    sample_data["item_id"] = sample_id
+
+    response = admin_client.post("/new-sample/", json=sample_data)
+    assert response.status_code == 201
+    assert response.json["status"] == "success"
+
+    response = admin_client.post(
+        "/add-data-block/",
+        json={
+            "block_type": block_type,
+            "item_id": sample_id,
+            "index": 0,
+        },
+    )
+
+    assert response.status_code == 200, f"Failed to add {block_type} block: {response.json}"
+    assert response.json["status"] == "success"
+
+    block_data = response.json["new_block_obj"]
+    block_id = block_data["block_id"]
+
+    block_file = "XRD/cod_9004112.cif"
+
+    example_file = example_data_dir / block_file
+
+    with open(example_file, "rb") as f:
         response = admin_client.post(
-            "/add-data-block/",
-            json={
-                "block_type": block_type,
+            "/upload-file/",
+            buffered=True,
+            content_type="multipart/form-data",
+            data={
                 "item_id": sample_id,
-                "index": block_index,
+                "file": [(f, example_file.name)],
+                "type": "application/octet-stream",
+                "replace_file": "null",
+                "relativePath": "null",
             },
         )
 
-        assert response.status_code == 200, f"Failed to add {block_type} block: {response.json()}"
-        assert response.json["status"] == "success"
+    assert response.status_code == 201, f"Failed to upload {example_file.name}"
+    assert response.json["status"] == "success"
+    file_id = response.json["file_id"]
 
-        block_data = response.json["new_block_obj"]
-        block_id = block_data["block_id"]
-        added_blocks[block_type] = {"block_id": block_id, "index": block_index}
-
-        if block_type == "uv-vis":
-            folder_path = example_data_path / folder_name
-            if folder_path.exists():
-                uv_files = list(folder_path.glob("*"))
-                assert len(uv_files) >= 2, f"UV-Vis needs at least 2 files, found {len(uv_files)}"
-
-                file_ids = []
-                for i, uv_file in enumerate(uv_files[:2]):
-                    with open(uv_file, "rb") as f:
-                        response = admin_client.post(
-                            "/upload-file/",
-                            buffered=True,
-                            content_type="multipart/form-data",
-                            data={
-                                "item_id": sample_id,
-                                "file": [(f, uv_file.name)],
-                                "type": "application/octet-stream",
-                                "replace_file": "null",
-                                "relativePath": "null",
-                            },
-                        )
-
-                    if response.status_code == 201:
-                        assert response.json["status"] == "success"
-                        file_id = response.json["file_id"]
-                        file_ids.append(file_id)
-                        uploaded_files.append({"block_type": block_type, "filename": uv_file.name})
-
-                if file_ids:
-                    response = admin_client.get(f"/get-item-data/{sample_id}")
-                    assert response.status_code == 200
-                    item_data = response.json["item_data"]
-                    block_data = item_data["blocks_obj"][block_id]
-                    block_data["file_id"] = file_ids[0]
-                    block_data["selected_file_order"] = file_ids
-
-                    response = admin_client.post(
-                        "/update-block/", json={"block_data": block_data, "save_to_db": True}
-                    )
-
-                    assert response.status_code == 200
-
-        else:
-            folder_path = example_data_path / folder_name
-            if folder_path.exists():
-                files_in_folder = list(folder_path.glob("*"))
-                assert len(files_in_folder) > 0, f"No files found in {folder_path}"
-
-                if block_type in preferred_files:
-                    preferred_file = folder_path / preferred_files[block_type]
-                    if preferred_file.exists():
-                        example_file = preferred_file
-                    else:
-                        example_file = files_in_folder[0]
-                else:
-                    example_file = files_in_folder[0]
-
-                with open(example_file, "rb") as f:
-                    response = admin_client.post(
-                        "/upload-file/",
-                        buffered=True,
-                        content_type="multipart/form-data",
-                        data={
-                            "item_id": sample_id,
-                            "file": [(f, example_file.name)],
-                            "type": "application/octet-stream",
-                            "replace_file": "null",
-                            "relativePath": "null",
-                        },
-                    )
-
-                assert response.status_code == 201, f"Failed to upload {example_file.name}"
-                assert response.json["status"] == "success"
-                file_id = response.json["file_id"]
-
-                response = admin_client.get(f"/get-item-data/{sample_id}")
-                assert response.status_code == 200
-                item_data = response.json["item_data"]
-                block_data = item_data["blocks_obj"][block_id]
-                block_data["file_id"] = file_id
-
-                response = admin_client.post(
-                    "/update-block/", json={"block_data": block_data, "save_to_db": True}
-                )
-                assert response.status_code == 200
-                uploaded_files.append({"block_type": block_type, "filename": example_file.name})
-
-                # For the media block, ensure we have a base64 encoded b64_encoded_image
-                # then try to save it back to the item
-                if block_type == "media":
-                    block_data = response.json["new_block_data"]
-                    assert "b64_encoded_image" in block_data
-
-                    response = admin_client.get(f"/get-item-data/{sample_id}")
-                    assert response.status_code == 200
-                    item_data = response.json["item_data"]
-
-                    item_data["blocks_obj"][block_id] = block_data
-                    response = admin_client.post(
-                        "/save-item/", json={"item_id": sample_id, "data": item_data}
-                    )
-                    assert response.status_code == 200
-
-        block_index += 1
-
-    response = admin_client.get(f"/get-item-data/{sample_id}?load_blocks=1")
+    response = admin_client.get(f"/get-item-data/{sample_id}")
+    assert response.status_code == 200
+    item_data = response.json["item_data"]
+    block_data = item_data["blocks_obj"][block_id]
+    block_data["file_id"] = file_id
+    block_data["wavelength"] = 2.0
+
+    response = admin_client.post("/update-block/", json={"block_data": block_data})
+
+    web_block = response.json["new_block_data"]
+    assert "bokeh_plot_data" in web_block
+    assert "computed" in web_block
+    assert web_block["wavelength"] == 2.0
+    assert "peak_data" in web_block["computed"]
+    assert "file_id" in web_block
+    assert web_block["file_id"] == file_id
+    assert web_block.get("errors") is None
+
+    block = XRDBlock.from_web(web_block)
+    db = block.to_db()
+    # 'computed' keys should be dropped when loading from web
+    assert "bokeh_plot_data" not in db
+    assert "computed" not in db
+    assert db["wavelength"] == 2.0
+
+    # But they should still be in the database
+    response = admin_client.get(f"/get-item-data/{sample_id}")
     assert response.status_code == 200
-    assert response.json["status"] == "success"
 
     item_data = response.json["item_data"]
+    assert response.json["status"] == "success"
     assert "blocks_obj" in item_data
-    assert len(item_data["blocks_obj"]) == len(added_blocks)
-    assert len(item_data["display_order"]) == len(added_blocks)
+    block = item_data["blocks_obj"][block_id]
+    assert "computed" in block
+    assert "peak_data" in block["computed"]
+    assert block["wavelength"] == 2.0
+
+
+def test_comment_block_manipulation(admin_client, default_sample_dict, database):
+    """Create a test sample with a comment block and test it for
+    dealing with unhandled data."""
+
+    block_type = "comment"
+
+    sample_id = "test_sample_with_files-comment"
+    sample_data = default_sample_dict.copy()
+    sample_data["item_id"] = sample_id
+
+    response = admin_client.post("/new-sample/", json=sample_data)
+    assert response.status_code == 201
+    assert response.json["status"] == "success"
+
+    response = admin_client.post(
+        "/add-data-block/",
+        json={
+            "block_type": block_type,
+            "item_id": sample_id,
+            "index": 0,
+        },
+    )
+
+    assert response.status_code == 200, f"Failed to add {block_type} block: {response.json}"
+    assert response.json["status"] == "success"
 
-    assert "file_ObjectIds" in item_data
-    assert len(item_data["file_ObjectIds"]) >= len(uploaded_files)
+    block_data = response.json["new_block_obj"]
+    block_id = block_data["block_id"]
+    block_data["freeform_comment"] = "This is a test comment block."
+    block_data["title"] = "Test Comment Block"
+    block_data["errors"] = ["Test Network Failure"]
 
-    block_types_in_sample = [block["blocktype"] for block in item_data["blocks_obj"].values()]
-    expected_block_types = list(block_file_mapping.keys())
+    response = admin_client.post("/update-block/", json={"block_data": block_data})
+    assert response.status_code == 200
+    assert response.json["status"] == "success"
+    assert response.json["new_block_data"]["blocktype"] == block_type
+    assert response.json["new_block_data"]["freeform_comment"] == "This is a test comment block."
+    assert response.json["new_block_data"]["title"] == "Test Comment Block"
+    assert response.json["new_block_data"].get("errors") is None
 
-    for expected_type in expected_block_types:
-        assert expected_type in block_types_in_sample
+    # Check that this result was actually stored
+    response = admin_client.get(f"/get-item-data/{sample_id}")
+    assert response.status_code == 200
+    item_data = response.json["item_data"]
+    assert response.json["status"] == "success"
+    assert (
+        response.json["item_data"]["blocks_obj"][block_id]["freeform_comment"]
+        == "This is a test comment block."
+    )
+    assert "errors" not in response.json["item_data"]["blocks_obj"][block_id]
 
+    # Try to add some bad data
+    block_data["bokeh_plot_data"] = {"bokeh": "json"}
+    block_data["random_new_key"] = "test new key"
+    block_data["freeform_comment"] = "This is a test comment block with extra data."
+    response = admin_client.post("/update-block/", json={"block_data": block_data})
+    assert response.status_code == 200
+    assert response.json["status"] == "success"
+    assert response.json["new_block_data"]["blocktype"] == block_type
+    assert (
+        response.json["new_block_data"]["freeform_comment"]
+        == "This is a test comment block with extra data."
+    )
+    assert response.json["new_block_data"]["title"] == "Test Comment Block"
+    assert "bokeh_plot_data" not in response.json["new_block_data"]
+
+    # Extra random keys will be in the response (in case they are parameters for the block that are not yet handled,
+    # but they will not be stored in the database)
+    assert "random_new_key" in response.json["new_block_data"]
+
+    raw_item = database.items.find_one({"item_id": sample_id})
+    assert raw_item
+    raw_block = raw_item["blocks_obj"][block_id]
+    assert "bokeh_plot_data" not in raw_block
+    # assert "random_new_key" not in raw_block
+    assert "errors" not in raw_block
+
+    # Finally, try to update using the save-item endpoint, and make sure any bad data gets stripped out
+    item_data["blocks_obj"][block_id]["bokeh_plot_data"] = {"bokeh": "json"}
+    item_data["blocks_obj"][block_id]["random_new_key"] = "test new key again"
+    item_data["blocks_obj"][block_id]["freeform_comment"] = "This is the latest test comment."
+
+    admin_client.post("/save-item/", json={"item_id": sample_id, "data": item_data})
+    assert response.status_code == 200
+
+    response = admin_client.get(f"/get-item-data/{sample_id}")
+    assert response.status_code == 200
+    assert response.json["status"] == "success"
+    item_data = response.json["item_data"]
+    block = item_data["blocks_obj"][block_id]
+
+    assert block["freeform_comment"] == "This is the latest test comment."
+    assert block.get("bokeh_plot_data") is None
+    assert block["random_new_key"] == "test new key again"
+
+
+@pytest.mark.parametrize(
+    "block_type, block_file",
+    [
+        ("tabular", "csv/simple.csv"),
+        ("cycle", "echem/jdb11-1_c3_gcpl_5cycles_2V-3p8V_C-24_data_C09.mpr"),
+        ("ftir", "FTIR/2024-10-10_FeSO4_ref.asp"),
+        ("nmr", "NMR/1.zip"),
+        ("raman", "raman/raman_example.txt"),
+        ("ms", "TGA-MS/20221128 134958 TGA MS Megan.asc"),
+        ("xrd", "XRD/Scan_C4.xrdml"),
+        ("media", "media/grey_group_logo.tif"),
+    ],
+)
+def test_create_sample_with_example_files(
+    block_type, block_file, admin_client, example_data_dir, default_sample_dict, database
+):
+    """Create a test sample with a block with file uploaded and test for errors."""
+
+    sample_id = f"test_sample_with_files-{block_type}"
+    sample_data = default_sample_dict.copy()
+    sample_data["item_id"] = sample_id
+
+    response = admin_client.post("/new-sample/", json=sample_data)
+    assert response.status_code == 201
+    assert response.json["status"] == "success"
+
+    response = admin_client.post(
+        "/add-data-block/",
+        json={
+            "block_type": block_type,
+            "item_id": sample_id,
+            "index": 0,
+        },
+    )
+
+    assert response.status_code == 200, f"Failed to add {block_type} block: {response.json}"
+    assert response.json["status"] == "success"
+
+    block_data = response.json["new_block_obj"]
+    block_id = block_data["block_id"]
+
+    example_file = example_data_dir / block_file
+
+    with open(example_file, "rb") as f:
+        response = admin_client.post(
+            "/upload-file/",
+            buffered=True,
+            content_type="multipart/form-data",
+            data={
+                "item_id": sample_id,
+                "file": [(f, example_file.name)],
+                "type": "application/octet-stream",
+                "replace_file": "null",
+                "relativePath": "null",
+            },
+        )
+
+    assert response.status_code == 201, f"Failed to upload {example_file.name}"
+    assert response.json["status"] == "success"
+    file_id = response.json["file_id"]
+
+    response = admin_client.get(f"/get-item-data/{sample_id}")
+    assert response.status_code == 200
+    item_data = response.json["item_data"]
+    block_data = item_data["blocks_obj"][block_id]
+    block_data["file_id"] = file_id
+
+    response = admin_client.post("/update-block/", json={"block_data": block_data})
+    assert response.status_code == 200
+    assert response.json["status"] == "success"
+    assert response.json["new_block_data"]["blocktype"] == block_type
+
+    # Some specific checks for different block types:
     if block_type != "media":
         assert response.json["new_block_data"]["bokeh_plot_data"] is not None
 
-    blocks_with_files = sum(1 for block in item_data["blocks_obj"].values() if block.get("file_id"))
-    blocks_without_files = [
-        block["blocktype"] for block in item_data["blocks_obj"].values() if not block.get("file_id")
-    ]
+    if block_type == "xrd":
+        assert response.json["new_block_data"]["computed"]["peak_data"] is not None
 
-    assert blocks_with_files >= len(uploaded_files) // 2, (
-        f"Not enough blocks have files attached. Blocks without files: {blocks_without_files}"
-    )
+    # For the media block, check that a TIF image is present and can be saved correctly
+    if block_type == "media":
+        block_data = response.json["new_block_data"]
+        assert "b64_encoded_image" in block_data
+
+        response = admin_client.get(f"/get-item-data/{sample_id}")
+        assert response.status_code == 200
+        item_data = response.json["item_data"]
+
+        item_data["blocks_obj"][block_id] = block_data
+        response = admin_client.post("/save-item/", json={"item_id": sample_id, "data": item_data})
+        assert response.status_code == 200
+
+    response = admin_client.get(f"/get-item-data/{sample_id}")
+    assert response.status_code == 200
+    assert response.json["status"] == "success"
+
+    item_data = response.json["item_data"]
+    assert "blocks_obj" in item_data
+
+    if block_type == "xrd":
+        doc = database.items.find_one({"item_id": sample_id}, projection={"blocks_obj": 1})
+        assert doc["blocks_obj"][block_id]["computed"]["peak_data"] is not None
diff --git a/webapp/src/server_fetch_utils.js b/webapp/src/server_fetch_utils.js
index 6e0ad3bb7..a41a35531 100644
--- a/webapp/src/server_fetch_utils.js
+++ b/webapp/src/server_fetch_utils.js
@@ -504,20 +504,15 @@ export async function getCollectionData(collection_id) {
     });
 }
 
-export async function updateBlockFromServer(
-  item_id,
-  block_id,
-  block_data,
-  event_data = null,
-  saveToDatabase = true,
-) {
+export async function updateBlockFromServer(item_id, block_id, block_data, event_data = null) {
   // Send the current block state to the API and receive an updated version
   // of the block in return, including any event data.
   //
   // - Will strip known "large data" keys, even if not formalised, e.g., bokeh_plot_data.
   //
   delete block_data.bokeh_plot_data;
-  delete block_data.processed_data;
+  delete block_data.b64_encoded_image;
+  delete block_data.computed;
   delete block_data.metadata;
 
   store.commit("setBlockUpdating", block_id);
@@ -526,7 +521,6 @@ export async function updateBlockFromServer(
     block_id: block_id,
     block_data: block_data,
     event_data: event_data,
-    save_to_db: saveToDatabase,
   })
     .then(function (response_json) {
       store.commit("updateBlockData", {
@@ -592,6 +586,26 @@ export function updateItemPermissions(refcode, creators) {
 
 export function saveItem(item_id) {
   var item_data = store.state.all_item_data[item_id];
+
+  let blocks = [];
+  let keysToExclude = ["bokeh_plot_data", "computed", "metadata", "b64_encoded_image"];
+
+  // Strip large data from blocks before saving, but make
+  // sure to preserve them in the store
+  if (item_data.blocks_obj) {
+    for (const block_id in item_data.blocks_obj) {
+      blocks.push(
+        Object.fromEntries(
+          Object.entries(item_data.blocks_obj[block_id]).filter(
+            ([key]) => !keysToExclude.includes(key),
+          ),
+        ),
+      );
+    }
+  }
+
+  item_data.blocks = blocks;
+
   store.commit("setItemSaved", { item_id: item_id, isSaved: false });
   fetch_post(`${API_URL}/save-item/`, {
     item_id: item_id,