Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 192 additions & 0 deletions demo/Advanced Experimental Design.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.insert(0, '../')\n",
"\n",
"print(sys.path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import obsidian\n",
"print(f'obsidian version: ' + obsidian.__version__)\n",
"\n",
"from obsidian.experiment import AdvExpDesigner"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define continuous parameters: key -> (low, high, step)\n",
"\n",
"continuous_params = {\n",
" 'temperature': (20, 80, 5), # Linear steps of 5 between 20 and 80\n",
" 'concentration': (0.1, 1.0, 0.1), # Linear steps of 0.1 between 0.1 and 1.0\n",
" 'pressure': (1, 16, 'geometric'), # Geometric steps doubling from 1 to 16 (1, 2, 4, 8, 16)\n",
" 'time': (10, 1000, 'logarithmic') # Logarithmic steps (powers of 10) between 10 and 1000\n",
"}\n",
"\n",
"# Define conditional categorical parameters with subparameters and frequencies: key -> {subkey: {'freq': frequency, 'subparams': ([values], [frequencies])}}\n",
"\n",
"conditional_subparameters = {\n",
" 'buffer_type': {\n",
" 'A': {'freq': 0.4, 'pH': ([6.0, 7.0, 8.0], [0.3, 0.4, 0.3])},\n",
" 'B': {'freq': 0.35, 'pH': ([5.0, 6.5], [0.7, 0.3])},\n",
" 'C': {'freq': 0.25, 'pH': ([7.5, 8.5], [0.6, 0.4])}\n",
" },\n",
" 'catalyst': {\n",
" 'X': {'freq': 0.5, 'loading': ([0.1, 0.2, 0.3], [0.2, 0.5, 0.3])},\n",
" 'Y': {'freq': 0.3, 'loading': ([0.05, 0.15], [0.6, 0.4])},\n",
" 'Z': {'freq': 0.2, 'loading': ([0.25, 0.35], [0.7, 0.3])}\n",
" }\n",
"}\n",
"\n",
"\n",
"# Initialize the designer\n",
"\n",
"designer = AdvExpDesigner(continuous_params, conditional_subparameters)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Generate a design with 100 samples, optimizing categorical assignments\n",
"design = designer.generate_design(seed=123, n_samples=100, optimize_categories=True)\n",
"design"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Evaluate the design quality metrics\n",
"metrics = designer.evaluate_design(design)\n",
"print(\"Design quality metrics:\")\n",
"for metric, value in metrics.items():\n",
" print(f\" {metric}: {value:.4f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot histograms of all parameters and subparameters\n",
"designer.plot_histograms(design)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot PCA colored by 'buffer_type'\n",
"designer.plot_pca(design, hue='buffer_type')\n",
"\n",
"# Plot UMAP colored by 'catalyst'\n",
"designer.plot_umap(design, hue='catalyst')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Optimize design over 30 trials with 100 samples each\n",
"best_design, metrics_df = designer.optimize_design(n_trials=30, n_samples=100)\n",
"\n",
"print(\"\\nBest design metrics after optimization:\")\n",
"print(metrics_df.sort_values('score', ascending=False).head(1))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot quality evolution over trials\n",
"designer.plot_quality_evolution(metrics_df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot correlation matrix of the design\n",
"\n",
"designer.plot_correlation(best_design)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Extend the best design by 20 new samples over 10 trials\n",
"extended_design, extension_summary = designer.extend_design(best_design, n=20, n_trials=10)\n",
"\n",
"print(\"\\nExtension summary:\")\n",
"print(extension_summary)\n",
"\n",
"# Plot the extended design\n",
"designer.plot_histograms(extended_design)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Compare empirical vs expected frequencies for categorical variables\n",
"designer.compare_frequencies(extended_design)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "obsidian",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
3 changes: 2 additions & 1 deletion obsidian/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""obsidian: Automated experiment design and black-box optimization"""
__version__ = '0.8.6'
__version__ = '0.8.6-test'

# Import key objects
from obsidian.campaign import Campaign
Expand All @@ -10,6 +10,7 @@
# Ensure that other subpackages are imported properly for documentation
from obsidian.objectives import Objective
from obsidian.experiment import ExpDesigner
from obsidian.experiment import AdvExpDesigner
import obsidian.constraints as constraints
import obsidian.exceptions as exceptions
import obsidian.acquisition as acquisition
Expand Down
95 changes: 95 additions & 0 deletions obsidian/experiment/LatinHypercube_Documentation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Parameters of scipy.stats.qmc.LatinHypercube

The `LatinHypercube` class generates Latin Hypercube Samples (LHS) in a multi-dimensional unit hypercube. It supports several parameters to control the sampling behavior, randomness, and sample quality.

## Parameters

### 1. `d` : int

**Description:**
The dimension of the sampling space, i.e., the number of parameters or variables to sample simultaneously.

**Possible values:**
Any positive integer (d > 0).

**Effect:**
Determines the number of columns in the sample matrix. Each sample point is a vector of length d with values in [0, 1].

### 2. `seed` : int, array_like, np.random.Generator, or None, optional (default: None)

**Description:**
Controls the random number generator used for sampling and scrambling.

**Possible values:**
- An integer seed for reproducibility.
- An instance of `np.random.Generator` for custom RNG.
- An array-like seed.
- `None` to use the default RNG.

**Effect:**
Using a fixed seed ensures reproducible sampling results. Different seeds produce different sample sets.

### 3. `scramble` : bool, optional (default: False)

**Description:**
Whether to apply scrambling to the Latin Hypercube design.

**Possible values:**
- `True`: Apply scrambling.
- `False`: No scrambling.

**Effect:**
Scrambling adds randomness to the sample points while preserving the stratification property of LHS. This reduces correlation and improves uniformity, often resulting in better space-filling designs.

### 4. `strength` : int, optional (default: 1)

**Description:**
The strength of the orthogonal array used to construct the LHS.

**Possible values:**
- `1`: Standard Latin Hypercube (default).
- `2` or higher: Higher strength orthogonal arrays, which enforce stronger uniformity constraints on projections of the sample points.

**Effect:**
Increasing strength improves uniformity in lower-dimensional projections of the sample but may reduce the number of feasible samples and increase computational complexity.

### 5. `optimization` : str or None, optional (default: None)

**Description:**
Method used to optimize the LHS design to improve space-filling properties.

**Possible values:**
- `'random-cd'`: Random coordinate descent optimization.
- `'centered'`: Centered Latin Hypercube design.
- `'maximin'`: Maximize the minimum distance between points.
- `None`: No optimization applied.

**Effect:**
Optimization attempts to improve the distribution of points by reducing clustering and increasing uniformity. Different methods have different computational costs and effectiveness:
- `'random-cd'`: Iteratively improves the design by random coordinate swaps.
- `'centered'`: Places points at the center of intervals for better uniformity.
- `'maximin'`: Maximizes the minimum pairwise distance between points, improving space-filling.

## Summary Table

| Parameter | Type | Default | Possible Values | Effect Summary |
|-----------|------|---------|-----------------|----------------|
| `d` | int | — | Positive integers | Number of dimensions sampled |
| `seed` | int, array_like, RNG, None | `None` | Integer seed, RNG, or `None` | Controls reproducibility of samples |
| `scramble` | bool | `False` | `True` or `False` | Adds randomness to reduce correlation and improve uniformity |
| `strength` | int | `1` | 1, 2, 3, ... | Orthogonality strength; higher values improve uniformity in projections |
| `optimization` | str or None | `None` | `'random-cd'`, `'centered'`, `'maximin'`, or `None` | Optimizes sample distribution for better space-filling |

## Notes

### Choosing `scramble`:
Scrambling is generally recommended for better sample quality unless you need a deterministic, non-random design.

### Choosing `strength`:
Use `strength=1` for standard LHS. Higher strengths improve uniformity but may limit sample size and increase complexity.

### Choosing `optimization`:
Optimization improves sample uniformity but increases computation time. `'random-cd'` is a good balance for many applications.

### Reproducibility:
Always set `seed` if you want reproducible results, especially when using scrambling or optimization.
1 change: 1 addition & 0 deletions obsidian/experiment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
from .design import *
from .simulator import *
from .utils import *
from .advanced_design import *
Loading
Loading