MSDLLCpapers · spencermcminn · Jul 10, 2025 · Jul 10, 2025 · Jul 29, 2025 · Jul 30, 2025
diff --git a/demo/Advanced Experimental Design.ipynb b/demo/Advanced Experimental Design.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.insert(0, '../')\n",
+    "\n",
+    "print(sys.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import obsidian\n",
+    "print(f'obsidian version: ' + obsidian.__version__)\n",
+    "\n",
+    "from obsidian.experiment import AdvExpDesigner"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define continuous parameters: key -> (low, high, step)\n",
+    "\n",
+    "continuous_params = {\n",
+    "    'temperature': (20, 80, 5),          # Linear steps of 5 between 20 and 80\n",
+    "    'concentration': (0.1, 1.0, 0.1),    # Linear steps of 0.1 between 0.1 and 1.0\n",
+    "    'pressure': (1, 16, 'geometric'),    # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
+    "    'time': (10, 1000, 'logarithmic')    # Logarithmic steps (powers of 10) between 10 and 1000\n",
+    "}\n",
+    "\n",
+    "# Define conditional categorical parameters with subparameters and frequencies: key -> {subkey: {'freq': frequency, 'subparams': ([values], [frequencies])}}\n",
+    "\n",
+    "conditional_subparameters = {\n",
+    "    'buffer_type': {\n",
+    "        'A': {'freq': 0.4, 'pH': ([6.0, 7.0, 8.0], [0.3, 0.4, 0.3])},\n",
+    "        'B': {'freq': 0.35, 'pH': ([5.0, 6.5], [0.7, 0.3])},\n",
+    "        'C': {'freq': 0.25, 'pH': ([7.5, 8.5], [0.6, 0.4])}\n",
+    "    },\n",
+    "    'catalyst': {\n",
+    "        'X': {'freq': 0.5, 'loading': ([0.1, 0.2, 0.3], [0.2, 0.5, 0.3])},\n",
+    "        'Y': {'freq': 0.3, 'loading': ([0.05, 0.15], [0.6, 0.4])},\n",
+    "        'Z': {'freq': 0.2, 'loading': ([0.25, 0.35], [0.7, 0.3])}\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "\n",
+    "# Initialize the designer\n",
+    "\n",
+    "designer = AdvExpDesigner(continuous_params, conditional_subparameters)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate a design with 100 samples, optimizing categorical assignments\n",
+    "design = designer.generate_design(seed=123, n_samples=100, optimize_categories=True)\n",
+    "design"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Evaluate the design quality metrics\n",
+    "metrics = designer.evaluate_design(design)\n",
+    "print(\"Design quality metrics:\")\n",
+    "for metric, value in metrics.items():\n",
+    "    print(f\"  {metric}: {value:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot histograms of all parameters and subparameters\n",
+    "designer.plot_histograms(design)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot PCA colored by 'buffer_type'\n",
+    "designer.plot_pca(design, hue='buffer_type')\n",
+    "\n",
+    "# Plot UMAP colored by 'catalyst'\n",
+    "designer.plot_umap(design, hue='catalyst')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optimize design over 30 trials with 100 samples each\n",
+    "best_design, metrics_df = designer.optimize_design(n_trials=30, n_samples=100)\n",
+    "\n",
+    "print(\"\\nBest design metrics after optimization:\")\n",
+    "print(metrics_df.sort_values('score', ascending=False).head(1))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot quality evolution over trials\n",
+    "designer.plot_quality_evolution(metrics_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plot correlation matrix of the design\n",
+    "\n",
+    "designer.plot_correlation(best_design)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extend the best design by 20 new samples over 10 trials\n",
+    "extended_design, extension_summary = designer.extend_design(best_design, n=20, n_trials=10)\n",
+    "\n",
+    "print(\"\\nExtension summary:\")\n",
+    "print(extension_summary)\n",
+    "\n",
+    "# Plot the extended design\n",
+    "designer.plot_histograms(extended_design)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compare empirical vs expected frequencies for categorical variables\n",
+    "designer.compare_frequencies(extended_design)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "obsidian",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/obsidian/__init__.py b/obsidian/__init__.py
@@ -1,5 +1,5 @@
 """obsidian: Automated experiment design and black-box optimization"""
-__version__ = '0.8.6'
+__version__ = '0.8.6-test'
 
 # Import key objects
 from obsidian.campaign import Campaign
@@ -10,6 +10,7 @@
 # Ensure that other subpackages are imported properly for documentation
 from obsidian.objectives import Objective
 from obsidian.experiment import ExpDesigner
+from obsidian.experiment import AdvExpDesigner
 import obsidian.constraints as constraints
 import obsidian.exceptions as exceptions
 import obsidian.acquisition as acquisition

diff --git a/obsidian/experiment/LatinHypercube_Documentation.md b/obsidian/experiment/LatinHypercube_Documentation.md
@@ -0,0 +1,95 @@
+# Parameters of scipy.stats.qmc.LatinHypercube
+
+The `LatinHypercube` class generates Latin Hypercube Samples (LHS) in a multi-dimensional unit hypercube. It supports several parameters to control the sampling behavior, randomness, and sample quality.
+
+## Parameters
+
+### 1. `d` : int
+
+**Description:**  
+The dimension of the sampling space, i.e., the number of parameters or variables to sample simultaneously.
+
+**Possible values:**  
+Any positive integer (d > 0).
+
+**Effect:**  
+Determines the number of columns in the sample matrix. Each sample point is a vector of length d with values in [0, 1].
+
+### 2. `seed` : int, array_like, np.random.Generator, or None, optional (default: None)
+
+**Description:**  
+Controls the random number generator used for sampling and scrambling.
+
+**Possible values:**  
+- An integer seed for reproducibility.
+- An instance of `np.random.Generator` for custom RNG.
+- An array-like seed.
+- `None` to use the default RNG.
+
+**Effect:**  
+Using a fixed seed ensures reproducible sampling results. Different seeds produce different sample sets.
+
+### 3. `scramble` : bool, optional (default: False)
+
+**Description:**  
+Whether to apply scrambling to the Latin Hypercube design.
+
+**Possible values:**  
+- `True`: Apply scrambling.
+- `False`: No scrambling.
+
+**Effect:**  
+Scrambling adds randomness to the sample points while preserving the stratification property of LHS. This reduces correlation and improves uniformity, often resulting in better space-filling designs.
+
+### 4. `strength` : int, optional (default: 1)
+
+**Description:**  
+The strength of the orthogonal array used to construct the LHS.
+
+**Possible values:**  
+- `1`: Standard Latin Hypercube (default).
+- `2` or higher: Higher strength orthogonal arrays, which enforce stronger uniformity constraints on projections of the sample points.
+
+**Effect:**  
+Increasing strength improves uniformity in lower-dimensional projections of the sample but may reduce the number of feasible samples and increase computational complexity.
+
+### 5. `optimization` : str or None, optional (default: None)
+
+**Description:**  
+Method used to optimize the LHS design to improve space-filling properties.
+
+**Possible values:**  
+- `'random-cd'`: Random coordinate descent optimization.
+- `'centered'`: Centered Latin Hypercube design.
+- `'maximin'`: Maximize the minimum distance between points.
+- `None`: No optimization applied.
+
+**Effect:**  
+Optimization attempts to improve the distribution of points by reducing clustering and increasing uniformity. Different methods have different computational costs and effectiveness:
+- `'random-cd'`: Iteratively improves the design by random coordinate swaps.
+- `'centered'`: Places points at the center of intervals for better uniformity.
+- `'maximin'`: Maximizes the minimum pairwise distance between points, improving space-filling.
+
+## Summary Table
+
+| Parameter | Type | Default | Possible Values | Effect Summary |
+|-----------|------|---------|-----------------|----------------|
+| `d` | int | — | Positive integers | Number of dimensions sampled |
+| `seed` | int, array_like, RNG, None | `None` | Integer seed, RNG, or `None` | Controls reproducibility of samples |
+| `scramble` | bool | `False` | `True` or `False` | Adds randomness to reduce correlation and improve uniformity |
+| `strength` | int | `1` | 1, 2, 3, ... | Orthogonality strength; higher values improve uniformity in projections |
+| `optimization` | str or None | `None` | `'random-cd'`, `'centered'`, `'maximin'`, or `None` | Optimizes sample distribution for better space-filling |
+
+## Notes
+
+### Choosing `scramble`:
+Scrambling is generally recommended for better sample quality unless you need a deterministic, non-random design.
+
+### Choosing `strength`:
+Use `strength=1` for standard LHS. Higher strengths improve uniformity but may limit sample size and increase complexity.
+
+### Choosing `optimization`:
+Optimization improves sample uniformity but increases computation time. `'random-cd'` is a good balance for many applications.
+
+### Reproducibility:
+Always set `seed` if you want reproducible results, especially when using scrambling or optimization.
diff --git a/obsidian/experiment/__init__.py b/obsidian/experiment/__init__.py
@@ -3,3 +3,4 @@
 from .design import *
 from .simulator import *
 from .utils import *
+from .advanced_design import *