From b6e8a98bc7d96384b4327e3f5a7c0a68b62d26b4 Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Fri, 19 May 2023 16:32:11 +0100
Subject: [PATCH 01/13] Introduce isotropic stationary kernels along with a
 generalized period kernel

---
 .gitignore     |   2 +
 pymc/gp/cov.py | 102 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 79 insertions(+), 25 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6be4482a6d..e64a89eae5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,3 +45,5 @@ pytestdebug.log
 # Codespaces
 pythonenv*
 env/
+venv/
+.venv/
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index c910ce93fd..1357e5c331 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -53,7 +53,7 @@ class BaseCovariance:
     Base class for kernels/covariance functions.
     """
 
-    def __call__(self, X, Xs=None, diag=False):
+    def __call__(self, X, Xs=None, diag: bool = False):
         r"""
         Evaluate the kernel/covariance function.
 
@@ -502,6 +502,9 @@ def square_dist(self, X, Xs):
 
     def euclidean_dist(self, X, Xs):
         r2 = self.square_dist(X, Xs)
+        return self._sqrt(r2)
+    
+    def _sqrt(self, r2):
         return pt.sqrt(r2 + 1e-12)
 
     def diag(self, X):
@@ -512,6 +515,23 @@ def full(self, X, Xs=None):
 
     def power_spectral_density(self, omega):
         raise NotImplementedError
+    
+
+class IsotropicStationary(Stationary):
+    r"""
+    Base class for isotropic stationary kernels/covariance functions i.e. kernels
+    that only depend on ‖x - x'‖.
+    """
+
+    def full(self, X, Xs=None):
+        X, Xs = self._slice(X, Xs)
+        r2 = self.square_dist(X, Xs)
+        return self.full_r2(r2)
+    
+    def full_r2(self, r2):
+        if hasattr(self, "full_r"):
+            return self.full_r(self._sqrt(r2))
+        raise NotImplementedError
 
 
 class Periodic(Stationary):
@@ -546,9 +566,50 @@ def full(self, X, Xs=None):
         r = np.pi * (f1 - f2) / self.period
         r = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
         return pt.exp(-0.5 * r)
+    
+
+class GeneralizedPeriodic(Stationary):
+    r"""
+    The Generalized Periodic kernel.
+
+    Can be used to wrap any isotropic stationary kernel to transform it
+    into a periodic version. The derivation can be achieved by mapping the
+    original inputs through the transformation u = (cos(x), sin(x)).
+    """
+
+    def __init__(self, base_kernel: IsotropicStationary, period):
+        self.base_kernel = base_kernel
+        self.period = period
+
+    @property
+    def input_dim(self):
+        return self.base_kernel.input_dim
+
+    @property
+    def active_dims(self):
+        return self.base_kernel.active_dims
+    
+    @property
+    def ls(self):
+        return self.base_kernel.ls
+
+    def full(self, X, Xs=None):
+        X, Xs = self._slice(X, Xs)
+        if Xs is None:
+            Xs = X
+        f1 = X.dimshuffle(0, "x", 1)
+        f2 = Xs.dimshuffle("x", 0, 1)
+        r = np.pi * (f1 - f2) / self.period
+        if hasattr(self.base_kernel, "full_r"):
+            r = pt.sum(pt.abs(pt.sin(r) / self.ls), 2)
+            K = self.base_kernel.full_r(r)
+        else:
+            r2 = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
+            K = self.base_kernel.full_r2(r2)
+        return K
 
 
-class ExpQuad(Stationary):
+class ExpQuad(IsotropicStationary):
     r"""
     The Exponentiated Quadratic kernel.  Also referred to as the Squared
     Exponential, or Radial Basis Function kernel.
@@ -559,9 +620,8 @@ class ExpQuad(Stationary):
 
     """
 
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
-        return pt.exp(-0.5 * self.square_dist(X, Xs))
+    def full_r2(self, r2):
+        return pt.exp(-0.5 * r2)
 
     def power_spectral_density(self, omega):
         r"""
@@ -579,7 +639,7 @@ def power_spectral_density(self, omega):
         return c * pt.prod(ls) * exp
 
 
-class RatQuad(Stationary):
+class RatQuad(IsotropicStationary):
     r"""
     The Rational Quadratic kernel.
 
@@ -592,15 +652,14 @@ def __init__(self, input_dim, alpha, ls=None, ls_inv=None, active_dims=None):
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.alpha = alpha
 
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
+    def full_r2(self, r2):
         return pt.power(
-            (1.0 + 0.5 * self.square_dist(X, Xs) * (1.0 / self.alpha)),
+            (1.0 + 0.5 * r2 * (1.0 / self.alpha)),
             -1.0 * self.alpha,
         )
 
 
-class Matern52(Stationary):
+class Matern52(IsotropicStationary):
     r"""
     The Matern kernel with nu = 5/2.
 
@@ -611,9 +670,7 @@ class Matern52(Stationary):
                    \mathrm{exp}\left[ - \frac{\sqrt{5(x - x')^2}}{\ell} \right]
     """
 
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
-        r = self.euclidean_dist(X, Xs)
+    def full_r(self, r):
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * pt.square(r)) * pt.exp(-1.0 * np.sqrt(5.0) * r)
 
     def power_spectral_density(self, omega):
@@ -641,7 +698,7 @@ def power_spectral_density(self, omega):
         return (num / den) * pt.prod(ls) * pow
 
 
-class Matern32(Stationary):
+class Matern32(IsotropicStationary):
     r"""
     The Matern kernel with nu = 3/2.
 
@@ -651,9 +708,7 @@ class Matern32(Stationary):
                   \mathrm{exp}\left[ - \frac{\sqrt{3(x - x')^2}}{\ell} \right]
     """
 
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
-        r = self.euclidean_dist(X, Xs)
+    def full_r(self, r):
         return (1.0 + np.sqrt(3.0) * r) * pt.exp(-np.sqrt(3.0) * r)
 
     def power_spectral_density(self, omega):
@@ -681,7 +736,7 @@ def power_spectral_density(self, omega):
         return (num / den) * pt.prod(ls) * pow
 
 
-class Matern12(Stationary):
+class Matern12(IsotropicStationary):
     r"""
     The Matern kernel with nu = 1/2
 
@@ -690,13 +745,11 @@ class Matern12(Stationary):
         k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{\ell} \right]
     """
 
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
-        r = self.euclidean_dist(X, Xs)
+    def full_r(self, r):
         return pt.exp(-r)
 
 
-class Exponential(Stationary):
+class Exponential(IsotropicStationary):
     r"""
     The Exponential kernel.
 
@@ -705,9 +758,8 @@ class Exponential(Stationary):
        k(x, x') = \mathrm{exp}\left[ -\frac{||x - x'||}{2\ell} \right]
     """
 
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
-        return pt.exp(-0.5 * self.euclidean_dist(X, Xs))
+    def full_r(self, r):
+        return pt.exp(-0.5 * r)
 
 
 class Cosine(Stationary):

From 6d93a2d7f638f016157b3aaa9b416fda90a6afe3 Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Fri, 19 May 2023 16:38:37 +0100
Subject: [PATCH 02/13] Introduce isotropic stationary kernels along with a
 generalized period kernel

---
 pymc/gp/cov.py       | 10 +++++-----
 tests/gp/test_cov.py | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 1357e5c331..1a52f0f22b 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -503,7 +503,7 @@ def square_dist(self, X, Xs):
     def euclidean_dist(self, X, Xs):
         r2 = self.square_dist(X, Xs)
         return self._sqrt(r2)
-    
+
     def _sqrt(self, r2):
         return pt.sqrt(r2 + 1e-12)
 
@@ -515,7 +515,7 @@ def full(self, X, Xs=None):
 
     def power_spectral_density(self, omega):
         raise NotImplementedError
-    
+
 
 class IsotropicStationary(Stationary):
     r"""
@@ -527,7 +527,7 @@ def full(self, X, Xs=None):
         X, Xs = self._slice(X, Xs)
         r2 = self.square_dist(X, Xs)
         return self.full_r2(r2)
-    
+
     def full_r2(self, r2):
         if hasattr(self, "full_r"):
             return self.full_r(self._sqrt(r2))
@@ -566,7 +566,7 @@ def full(self, X, Xs=None):
         r = np.pi * (f1 - f2) / self.period
         r = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
         return pt.exp(-0.5 * r)
-    
+
 
 class GeneralizedPeriodic(Stationary):
     r"""
@@ -588,7 +588,7 @@ def input_dim(self):
     @property
     def active_dims(self):
         return self.base_kernel.active_dims
-    
+
     @property
     def ls(self):
         return self.base_kernel.ls
diff --git a/tests/gp/test_cov.py b/tests/gp/test_cov.py
index ba8d41962c..aceff76b13 100644
--- a/tests/gp/test_cov.py
+++ b/tests/gp/test_cov.py
@@ -617,6 +617,21 @@ def test_1d(self):
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
+class TestGeneralizedPeriodic:
+    def test_1d(self):
+        X = np.linspace(0, 1, 10)[:, None]
+        with pm.Model() as model:
+            base = pm.gp.cov.ExpQuad(1, 0.1)
+            cov = pm.gp.cov.GeneralizedPeriodic(base, 0.1)
+        K = cov(X).eval()
+        npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
+        K = cov(X, X).eval()
+        npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
+        # check diagonal
+        Kd = cov(X, diag=True).eval()
+        npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
+
+
 class TestLinear:
     def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]

From ed6c95281b352b659fa3ce5e5806f476c2b317fa Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Mon, 22 May 2023 14:03:42 +0100
Subject: [PATCH 03/13] Refactor code such that we extend existing periodic
 kernel instead of creating a new one; and implemented such that the base
 kernel is passed by class rather than by instance

---
 pymc/gp/cov.py       | 166 ++++++++++++++++++++++---------------------
 tests/gp/test_cov.py |  11 ++-
 2 files changed, 90 insertions(+), 87 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 1a52f0f22b..77abcc0cab 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -18,7 +18,7 @@
 from collections import Counter
 from functools import reduce
 from operator import add, mul
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Type
 
 import numpy as np
 import pytensor.tensor as pt
@@ -440,7 +440,7 @@ class Circular(Covariance):
     https://hal.archives-ouvertes.fr/hal-01119942v1/document
     """
 
-    def __init__(self, input_dim, period, tau=4, active_dims=None):
+    def __init__(self, input_dim: int, period, tau=4, active_dims: Optional[Sequence[int]] = None):
         super().__init__(input_dim, active_dims)
         self.c = pt.as_tensor_variable(period / 2)
         self.tau = tau
@@ -470,11 +470,11 @@ class Stationary(Covariance):
     Parameters
     ----------
     ls: Lengthscale.  If input_dim > 1, a list or array of scalars or PyMC random
-    variables.  If input_dim == 1, a scalar or PyMC random variable.
+        variables.  If input_dim == 1, a scalar or PyMC random variable.
     ls_inv: Inverse lengthscale.  1 / ls.  One of ls or ls_inv must be provided.
     """
 
-    def __init__(self, input_dim, ls=None, ls_inv=None, active_dims=None):
+    def __init__(self, input_dim: int, ls=None, ls_inv=None, active_dims: Optional[Sequence[int]] = None):
         super().__init__(input_dim, active_dims)
         if (ls is None and ls_inv is None) or (ls is not None and ls_inv is not None):
             raise ValueError("Only one of 'ls' or 'ls_inv' must be provided")
@@ -534,81 +534,6 @@ def full_r2(self, r2):
         raise NotImplementedError
 
 
-class Periodic(Stationary):
-    r"""
-    The Periodic kernel.
-
-    .. math::
-       k(x, x') = \mathrm{exp}\left( -\frac{\mathrm{sin}^2(\pi |x-x'| \frac{1}{T})}{2\ell^2} \right)
-
-    Notes
-    -----
-    Note that the scaling factor for this kernel is different compared to the more common
-    definition (see [1]_). Here, 0.5 is in the exponent instead of the more common value, 2.
-    Divide the length-scale by 2 when initializing the kernel to recover the standard definition.
-
-    References
-    ----------
-    .. [1] David Duvenaud, "The Kernel Cookbook"
-       https://www.cs.toronto.edu/~duvenaud/cookbook/
-    """
-
-    def __init__(self, input_dim, period, ls=None, ls_inv=None, active_dims=None):
-        super().__init__(input_dim, ls, ls_inv, active_dims)
-        self.period = period
-
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
-        if Xs is None:
-            Xs = X
-        f1 = X.dimshuffle(0, "x", 1)
-        f2 = Xs.dimshuffle("x", 0, 1)
-        r = np.pi * (f1 - f2) / self.period
-        r = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
-        return pt.exp(-0.5 * r)
-
-
-class GeneralizedPeriodic(Stationary):
-    r"""
-    The Generalized Periodic kernel.
-
-    Can be used to wrap any isotropic stationary kernel to transform it
-    into a periodic version. The derivation can be achieved by mapping the
-    original inputs through the transformation u = (cos(x), sin(x)).
-    """
-
-    def __init__(self, base_kernel: IsotropicStationary, period):
-        self.base_kernel = base_kernel
-        self.period = period
-
-    @property
-    def input_dim(self):
-        return self.base_kernel.input_dim
-
-    @property
-    def active_dims(self):
-        return self.base_kernel.active_dims
-
-    @property
-    def ls(self):
-        return self.base_kernel.ls
-
-    def full(self, X, Xs=None):
-        X, Xs = self._slice(X, Xs)
-        if Xs is None:
-            Xs = X
-        f1 = X.dimshuffle(0, "x", 1)
-        f2 = Xs.dimshuffle("x", 0, 1)
-        r = np.pi * (f1 - f2) / self.period
-        if hasattr(self.base_kernel, "full_r"):
-            r = pt.sum(pt.abs(pt.sin(r) / self.ls), 2)
-            K = self.base_kernel.full_r(r)
-        else:
-            r2 = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
-            K = self.base_kernel.full_r2(r2)
-        return K
-
-
 class ExpQuad(IsotropicStationary):
     r"""
     The Exponentiated Quadratic kernel.  Also referred to as the Squared
@@ -648,7 +573,7 @@ class RatQuad(IsotropicStationary):
        k(x, x') = \left(1 + \frac{(x - x')^2}{2\alpha\ell^2} \right)^{-\alpha}
     """
 
-    def __init__(self, input_dim, alpha, ls=None, ls_inv=None, active_dims=None):
+    def __init__(self, input_dim: int, alpha, ls=None, ls_inv=None, active_dims: Optional[Sequence[int]] = None):
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.alpha = alpha
 
@@ -762,6 +687,87 @@ def full_r(self, r):
         return pt.exp(-0.5 * r)
 
 
+class Periodic(Stationary):
+    r"""
+    The Periodic kernel.
+
+    This can be used to wrap any `IsotropicStationary` kernel to transform it into a periodic
+    version. The canonical form (based on the `ExpQuad` kernel) is given by:
+
+    .. math::
+
+       k(x, x') = \mathrm{exp}\left( -\frac{\mathrm{sin}^2(\pi |x-x'| \frac{1}{T})}{2\ell^2} \right)
+
+    The derivation can be achieved by mapping the original inputs through the transformation
+    u = (cos(x), sin(x)).
+
+    Parameters
+    ----------
+    period: Period.  The period of the kernel.
+    ls: Lengthscale.  If input_dim > 1, a list or array of scalars or PyMC random
+        variables.  If input_dim == 1, a scalar or PyMC random variable.
+    ls_inv: Inverse lengthscale.  1 / ls.  One of ls or ls_inv must be provided.
+    base_kernel_class: Base kernel class.  The `IsotropicStationary` kernel to use
+        for the base behaviour, defaults to `ExpQuad`.
+    **base_kernel_kwargs: Additional kwargs passed to the base kernel for initialization.
+        For example, used to pass `alpha` to the `RatQuad` kernel.
+
+    Notes
+    -----
+    Note that the scaling factor for this kernel is different compared to the more common
+    definition (see [1]_). Here, 0.5 is in the exponent instead of the more common value, 2.
+    Divide the length-scale by 2 when initializing the kernel to recover the standard definition.
+
+    References
+    ----------
+    .. [1] David Duvenaud, "The Kernel Cookbook"
+       https://www.cs.toronto.edu/~duvenaud/cookbook/
+    """
+
+    def __init__(
+            self,
+            input_dim: int,
+            period,
+            ls=None,
+            ls_inv=None,
+            active_dims: Optional[Sequence[int]] = None,
+            base_kernel_class: Type[IsotropicStationary] = ExpQuad,
+            **base_kernel_kwargs,
+        ):
+        super().__init__(input_dim, ls, ls_inv, active_dims)
+
+        if period <= 0:
+            raise ValueError("Period parameter must be positive")
+        self.period = period
+        self.base_kernel_class = base_kernel_class
+        self.base_kernel_kwargs = base_kernel_kwargs
+
+    @property
+    def base_kernel(self) -> IsotropicStationary:
+        """Instantiation of the base kernel."""
+        return self.base_kernel_class(
+            self.input_dim,
+            ls=self.ls,
+            active_dims=self.active_dims,
+            **self.base_kernel_kwargs,
+        )
+
+    def full(self, X, Xs=None):
+        X, Xs = self._slice(X, Xs)
+        if Xs is None:
+            Xs = X
+        f1 = pt.expand_dims(X, axis=(0,))
+        f2 = pt.expand_dims(X, axis=(1,))
+        r = np.pi * (f1 - f2) / self.period
+        if hasattr(self.base_kernel, "full_r"):
+            r = pt.sum(pt.abs(pt.sin(r) / self.ls), 2)
+            K = self.base_kernel.full_r(r)
+        else:
+            r2 = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
+            K = self.base_kernel.full_r2(r2)
+        return K
+
+
 class Cosine(Stationary):
     r"""
     The Cosine kernel.
diff --git a/tests/gp/test_cov.py b/tests/gp/test_cov.py
index aceff76b13..b9a7e0d231 100644
--- a/tests/gp/test_cov.py
+++ b/tests/gp/test_cov.py
@@ -616,17 +616,14 @@ def test_1d(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
-
-class TestGeneralizedPeriodic:
-    def test_1d(self):
+    def test_1d_ratquad(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
-            base = pm.gp.cov.ExpQuad(1, 0.1)
-            cov = pm.gp.cov.GeneralizedPeriodic(base, 0.1)
+            cov = pm.gp.cov.Periodic(1, 0.1, 0.1, base_kernel_class=pm.gp.cov.RatQuad, alpha=0.5)
         K = cov(X).eval()
-        npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
+        npt.assert_allclose(K[0, 1], 0.28063, atol=1e-3)
         K = cov(X, X).eval()
-        npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
+        npt.assert_allclose(K[0, 1], 0.28063, atol=1e-3)
         # check diagonal
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)

From 1630ee9710a3838d111193e2e69c7f23c58b688a Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Mon, 22 May 2023 14:04:43 +0100
Subject: [PATCH 04/13] Refactor code such that we extend existing periodic
 kernel instead of creating a new one; and implemented such that the base
 kernel is passed by class rather than by instance

---
 pymc/gp/cov.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 77abcc0cab..2df8c29598 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -474,7 +474,9 @@ class Stationary(Covariance):
     ls_inv: Inverse lengthscale.  1 / ls.  One of ls or ls_inv must be provided.
     """
 
-    def __init__(self, input_dim: int, ls=None, ls_inv=None, active_dims: Optional[Sequence[int]] = None):
+    def __init__(
+        self, input_dim: int, ls=None, ls_inv=None, active_dims: Optional[Sequence[int]] = None
+    ):
         super().__init__(input_dim, active_dims)
         if (ls is None and ls_inv is None) or (ls is not None and ls_inv is not None):
             raise ValueError("Only one of 'ls' or 'ls_inv' must be provided")
@@ -573,7 +575,14 @@ class RatQuad(IsotropicStationary):
        k(x, x') = \left(1 + \frac{(x - x')^2}{2\alpha\ell^2} \right)^{-\alpha}
     """
 
-    def __init__(self, input_dim: int, alpha, ls=None, ls_inv=None, active_dims: Optional[Sequence[int]] = None):
+    def __init__(
+        self,
+        input_dim: int,
+        alpha,
+        ls=None,
+        ls_inv=None,
+        active_dims: Optional[Sequence[int]] = None,
+    ):
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.alpha = alpha
 
@@ -725,15 +734,15 @@ class Periodic(Stationary):
     """
 
     def __init__(
-            self,
-            input_dim: int,
-            period,
-            ls=None,
-            ls_inv=None,
-            active_dims: Optional[Sequence[int]] = None,
-            base_kernel_class: Type[IsotropicStationary] = ExpQuad,
-            **base_kernel_kwargs,
-        ):
+        self,
+        input_dim: int,
+        period,
+        ls=None,
+        ls_inv=None,
+        active_dims: Optional[Sequence[int]] = None,
+        base_kernel_class: Type[IsotropicStationary] = ExpQuad,
+        **base_kernel_kwargs,
+    ):
         super().__init__(input_dim, ls, ls_inv, active_dims)
 
         if period <= 0:

From 45356bb4135cb22077c3c99167a5d21b898ac34e Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Mon, 22 May 2023 16:23:59 +0100
Subject: [PATCH 05/13] Increase type hint coverage for cov funcs

---
 pymc/gp/cov.py | 153 +++++++++++++++++++++++++++++++------------------
 1 file changed, 98 insertions(+), 55 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 2df8c29598..86457549a5 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -18,7 +18,7 @@
 from collections import Counter
 from functools import reduce
 from operator import add, mul
-from typing import Optional, Sequence, Type
+from typing import Optional, Sequence, Type, Union
 
 import numpy as np
 import pytensor.tensor as pt
@@ -47,13 +47,17 @@
     "Kron",
 ]
 
+TensorLike = Union[np.ndarray, TensorVariable]
+
 
 class BaseCovariance:
     """
     Base class for kernels/covariance functions.
     """
 
-    def __call__(self, X, Xs=None, diag: bool = False):
+    def __call__(
+        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+    ) -> TensorVariable:
         r"""
         Evaluate the kernel/covariance function.
 
@@ -71,10 +75,10 @@ def __call__(self, X, Xs=None, diag: bool = False):
         else:
             return self.full(X, Xs)
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         raise NotImplementedError
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         raise NotImplementedError
 
     def __add__(self, other):
@@ -145,7 +149,7 @@ class Covariance(BaseCovariance):
         function operates on.
     """
 
-    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None):
+    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None) -> None:
         self.input_dim = input_dim
         if active_dims is None:
             self.active_dims = np.arange(input_dim)
@@ -156,7 +160,7 @@ def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None):
             raise ValueError("Values in `active_dims` can't be larger than `input_dim`.")
 
     @property
-    def n_dims(self):
+    def n_dims(self) -> int:
         """The dimensionality of the input, as taken from the
         `active_dims`.
         """
@@ -182,7 +186,7 @@ def _slice(self, X, Xs=None):
 
 
 class Combination(Covariance):
-    def __init__(self, factor_list):
+    def __init__(self, factor_list) -> None:
         """Use constituent factors to get input_dim and active_dims for the Combination covariance."""
 
         # Check if all input_dim are the same in factor_list
@@ -295,7 +299,9 @@ def _merge_factors_psd(self, omega):
 
 
 class Add(Combination):
-    def __call__(self, X, Xs=None, diag=False):
+    def __call__(
+        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+    ) -> TensorVariable:
         return reduce(add, self._merge_factors_cov(X, Xs, diag))
 
     def power_spectral_density(self, omega):
@@ -303,7 +309,9 @@ def power_spectral_density(self, omega):
 
 
 class Prod(Combination):
-    def __call__(self, X, Xs=None, diag=False):
+    def __call__(
+        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+    ) -> TensorVariable:
         return reduce(mul, self._merge_factors_cov(X, Xs, diag))
 
     def power_spectral_density(self, omega):
@@ -318,12 +326,14 @@ def power_spectral_density(self, omega):
 
 
 class Exponentiated(Covariance):
-    def __init__(self, kernel, power):
+    def __init__(self, kernel: Covariance, power) -> None:
         self.kernel = kernel
         self.power = power
         super().__init__(input_dim=self.kernel.input_dim, active_dims=self.kernel.active_dims)
 
-    def __call__(self, X, Xs=None, diag=False):
+    def __call__(
+        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+    ) -> TensorVariable:
         return self.kernel(X, Xs, diag=diag) ** self.power
 
 
@@ -343,7 +353,7 @@ class Kron(Covariance):
     implementations.
     """
 
-    def __init__(self, factor_list):
+    def __init__(self, factor_list) -> None:
         self.input_dims = [factor.input_dim for factor in factor_list]
         input_dim = sum(self.input_dims)
         super().__init__(input_dim=input_dim)
@@ -358,7 +368,9 @@ def _split(self, X, Xs):
             Xs_split = [None] * len(X_split)
         return X_split, Xs_split
 
-    def __call__(self, X, Xs=None, diag=False):
+    def __call__(
+        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+    ) -> TensorVariable:
         X_split, Xs_split = self._split(X, Xs)
         covs = [cov(x, xs, diag) for cov, x, xs in zip(self._factor_list, X_split, Xs_split)]
         return reduce(mul, covs)
@@ -373,13 +385,13 @@ class Constant(BaseCovariance):
        k(x, x') = c
     """
 
-    def __init__(self, c):
+    def __init__(self, c) -> None:
         self.c = c
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         return pt.alloc(self.c, X.shape[0])
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         if Xs is None:
             return pt.alloc(self.c, X.shape[0], X.shape[0])
         else:
@@ -395,13 +407,13 @@ class WhiteNoise(BaseCovariance):
        k(x, x') = \sigma^2 \mathrm{I}
     """
 
-    def __init__(self, sigma):
+    def __init__(self, sigma) -> None:
         self.sigma = sigma
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         return pt.alloc(pt.square(self.sigma), X.shape[0])
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         if Xs is None:
             return pt.diag(self.diag(X))
         else:
@@ -440,7 +452,9 @@ class Circular(Covariance):
     https://hal.archives-ouvertes.fr/hal-01119942v1/document
     """
 
-    def __init__(self, input_dim: int, period, tau=4, active_dims: Optional[Sequence[int]] = None):
+    def __init__(
+        self, input_dim: int, period, tau=4, active_dims: Optional[Sequence[int]] = None
+    ) -> None:
         super().__init__(input_dim, active_dims)
         self.c = pt.as_tensor_variable(period / 2)
         self.tau = tau
@@ -455,11 +469,11 @@ def dist(self, X, Xs):
     def weinland(self, t):
         return (1 + self.tau * t / self.c) * pt.clip(1 - t / self.c, 0, np.inf) ** self.tau
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         return self.weinland(self.dist(X, Xs))
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         return pt.alloc(1.0, X.shape[0])
 
 
@@ -476,7 +490,7 @@ class Stationary(Covariance):
 
     def __init__(
         self, input_dim: int, ls=None, ls_inv=None, active_dims: Optional[Sequence[int]] = None
-    ):
+    ) -> None:
         super().__init__(input_dim, active_dims)
         if (ls is None and ls_inv is None) or (ls is not None and ls_inv is not None):
             raise ValueError("Only one of 'ls' or 'ls_inv' must be provided")
@@ -509,10 +523,10 @@ def euclidean_dist(self, X, Xs):
     def _sqrt(self, r2):
         return pt.sqrt(r2 + 1e-12)
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         return pt.alloc(1.0, X.shape[0])
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         raise NotImplementedError
 
     def power_spectral_density(self, omega):
@@ -525,12 +539,12 @@ class IsotropicStationary(Stationary):
     that only depend on ‖x - x'‖.
     """
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         r2 = self.square_dist(X, Xs)
         return self.full_r2(r2)
 
-    def full_r2(self, r2):
+    def full_r2(self, r2: TensorLike) -> TensorVariable:
         if hasattr(self, "full_r"):
             return self.full_r(self._sqrt(r2))
         raise NotImplementedError
@@ -547,7 +561,7 @@ class ExpQuad(IsotropicStationary):
 
     """
 
-    def full_r2(self, r2):
+    def full_r2(self, r2: TensorLike) -> TensorVariable:
         return pt.exp(-0.5 * r2)
 
     def power_spectral_density(self, omega):
@@ -582,11 +596,11 @@ def __init__(
         ls=None,
         ls_inv=None,
         active_dims: Optional[Sequence[int]] = None,
-    ):
+    ) -> None:
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.alpha = alpha
 
-    def full_r2(self, r2):
+    def full_r2(self, r2: TensorLike) -> TensorVariable:
         return pt.power(
             (1.0 + 0.5 * r2 * (1.0 / self.alpha)),
             -1.0 * self.alpha,
@@ -604,7 +618,7 @@ class Matern52(IsotropicStationary):
                    \mathrm{exp}\left[ - \frac{\sqrt{5(x - x')^2}}{\ell} \right]
     """
 
-    def full_r(self, r):
+    def full_r(self, r: TensorLike) -> TensorVariable:
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * pt.square(r)) * pt.exp(-1.0 * np.sqrt(5.0) * r)
 
     def power_spectral_density(self, omega):
@@ -642,7 +656,7 @@ class Matern32(IsotropicStationary):
                   \mathrm{exp}\left[ - \frac{\sqrt{3(x - x')^2}}{\ell} \right]
     """
 
-    def full_r(self, r):
+    def full_r(self, r: TensorLike) -> TensorVariable:
         return (1.0 + np.sqrt(3.0) * r) * pt.exp(-np.sqrt(3.0) * r)
 
     def power_spectral_density(self, omega):
@@ -679,7 +693,7 @@ class Matern12(IsotropicStationary):
         k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{\ell} \right]
     """
 
-    def full_r(self, r):
+    def full_r(self, r: TensorLike) -> TensorVariable:
         return pt.exp(-r)
 
 
@@ -692,7 +706,7 @@ class Exponential(IsotropicStationary):
        k(x, x') = \mathrm{exp}\left[ -\frac{||x - x'||}{2\ell} \right]
     """
 
-    def full_r(self, r):
+    def full_r(self, r: TensorLike) -> TensorVariable:
         return pt.exp(-0.5 * r)
 
 
@@ -742,7 +756,7 @@ def __init__(
         active_dims: Optional[Sequence[int]] = None,
         base_kernel_class: Type[IsotropicStationary] = ExpQuad,
         **base_kernel_kwargs,
-    ):
+    ) -> None:
         super().__init__(input_dim, ls, ls_inv, active_dims)
 
         if period <= 0:
@@ -761,7 +775,7 @@ def base_kernel(self) -> IsotropicStationary:
             **self.base_kernel_kwargs,
         )
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         if Xs is None:
             Xs = X
@@ -785,7 +799,7 @@ class Cosine(Stationary):
        k(x, x') = \mathrm{cos}\left( 2 \pi \frac{||x - x'||}{ \ell^2} \right)
     """
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         return pt.cos(2.0 * np.pi * self.euclidean_dist(X, Xs))
 
@@ -798,7 +812,7 @@ class Linear(Covariance):
        k(x, x') = (x - c)(x' - c)
     """
 
-    def __init__(self, input_dim, c, active_dims=None):
+    def __init__(self, input_dim: int, c, active_dims: Optional[Sequence[int]] = None) -> None:
         super().__init__(input_dim, active_dims)
         self.c = c
 
@@ -807,7 +821,7 @@ def _common(self, X, Xs=None):
         Xc = pt.sub(X, self.c)
         return X, Xc, Xs
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xc, Xs = self._common(X, Xs)
         if Xs is None:
             return pt.dot(Xc, pt.transpose(Xc))
@@ -815,7 +829,7 @@ def full(self, X, Xs=None):
             Xsc = pt.sub(Xs, self.c)
             return pt.dot(Xc, pt.transpose(Xsc))
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         X, Xc, _ = self._common(X, None)
         return pt.sum(pt.square(Xc), 1)
 
@@ -828,16 +842,18 @@ class Polynomial(Linear):
        k(x, x') = [(x - c)(x' - c) + \mathrm{offset}]^{d}
     """
 
-    def __init__(self, input_dim, c, d, offset, active_dims=None):
+    def __init__(
+        self, input_dim: int, c, d, offset, active_dims: Optional[Sequence[int]] = None
+    ) -> None:
         super().__init__(input_dim, c, active_dims)
         self.d = d
         self.offset = offset
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         linear = super().full(X, Xs)
         return pt.power(linear + self.offset, self.d)
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         linear = super().diag(X)
         return pt.power(linear + self.offset, self.d)
 
@@ -859,7 +875,14 @@ class WarpedInput(Covariance):
         Additional inputs (besides X or Xs) to warp_func.
     """
 
-    def __init__(self, input_dim, cov_func, warp_func, args=None, active_dims=None):
+    def __init__(
+        self,
+        input_dim: int,
+        cov_func,
+        warp_func,
+        args=None,
+        active_dims: Optional[Sequence[int]] = None,
+    ) -> None:
         super().__init__(input_dim, active_dims)
         if not callable(warp_func):
             raise TypeError("warp_func must be callable")
@@ -869,14 +892,14 @@ def __init__(self, input_dim, cov_func, warp_func, args=None, active_dims=None):
         self.args = args
         self.cov_func = cov_func
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         if Xs is None:
             return self.cov_func(self.w(X, self.args), Xs)
         else:
             return self.cov_func(self.w(X, self.args), self.w(Xs, self.args))
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         X, _ = self._slice(X, None)
         return self.cov_func(self.w(X, self.args), diag=True)
 
@@ -899,7 +922,13 @@ class Gibbs(Covariance):
         Additional inputs (besides X or Xs) to lengthscale_func.
     """
 
-    def __init__(self, input_dim, lengthscale_func, args=None, active_dims=None):
+    def __init__(
+        self,
+        input_dim: int,
+        lengthscale_func,
+        args=None,
+        active_dims: Optional[Sequence[int]] = None,
+    ) -> None:
         super().__init__(input_dim, active_dims)
         if active_dims is not None:
             if len(active_dims) > 1:
@@ -925,7 +954,7 @@ def square_dist(self, X, Xs=None):
             )
         return pt.clip(sqd, 0.0, np.inf)
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         rx = self.lfunc(pt.as_tensor_variable(X), self.args)
         if Xs is None:
@@ -938,7 +967,7 @@ def full(self, X, Xs=None):
         rz2 = pt.reshape(pt.square(rz), (1, -1))
         return pt.sqrt((2.0 * pt.outer(rx, rz)) / (rx2 + rz2)) * pt.exp(-1.0 * r2 / (rx2 + rz2))
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         return pt.alloc(1.0, X.shape[0])
 
 
@@ -961,7 +990,14 @@ class ScaledCov(Covariance):
         Additional inputs (besides X or Xs) to lengthscale_func.
     """
 
-    def __init__(self, input_dim, cov_func, scaling_func, args=None, active_dims=None):
+    def __init__(
+        self,
+        input_dim: int,
+        cov_func,
+        scaling_func,
+        args=None,
+        active_dims: Optional[Sequence[int]] = None,
+    ) -> None:
         super().__init__(input_dim, active_dims)
         if not callable(scaling_func):
             raise TypeError("scaling_func must be callable")
@@ -971,13 +1007,13 @@ def __init__(self, input_dim, cov_func, scaling_func, args=None, active_dims=Non
         self.scaling_func = handle_args(scaling_func, args)
         self.args = args
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         X, _ = self._slice(X, None)
         cov_diag = self.cov_func(X, diag=True)
         scf_diag = pt.square(pt.flatten(self.scaling_func(X, self.args)))
         return cov_diag * scf_diag
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         scf_x = self.scaling_func(X, self.args)
         if Xs is None:
@@ -1020,7 +1056,14 @@ class Coregion(Covariance):
     `input_dim != 1`, then `active_dims` must have a length of one.
     """
 
-    def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None):
+    def __init__(
+        self,
+        input_dim: int,
+        W=None,
+        kappa=None,
+        B=None,
+        active_dims: Optional[Sequence[int]] = None,
+    ) -> None:
         super().__init__(input_dim, active_dims)
         if len(self.active_dims) != 1:
             raise ValueError("Coregion requires exactly one dimension to be active")
@@ -1036,7 +1079,7 @@ def __init__(self, input_dim, W=None, kappa=None, B=None, active_dims=None):
         else:
             raise ValueError("Exactly one of (W, kappa) and B must be provided to Coregion")
 
-    def full(self, X, Xs=None):
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         index = pt.cast(X, "int32")
         if Xs is None:
@@ -1045,7 +1088,7 @@ def full(self, X, Xs=None):
             index2 = pt.cast(Xs, "int32").T
         return self.B[index, index2]
 
-    def diag(self, X):
+    def diag(self, X: TensorLike) -> TensorVariable:
         X, _ = self._slice(X, None)
         index = pt.cast(X, "int32")
         return pt.diag(self.B)[index.ravel()]

From 6d9db6b6a32e3d778900242fd332189da4dcf3dc Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Tue, 23 May 2023 11:05:16 +0100
Subject: [PATCH 06/13] Move isotropic stationary functionality into stationary
 kernel

---
 pymc/gp/cov.py | 59 ++++++++++++++++++++------------------------------
 1 file changed, 24 insertions(+), 35 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 86457549a5..05bc35457d 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -526,19 +526,6 @@ def _sqrt(self, r2):
     def diag(self, X: TensorLike) -> TensorVariable:
         return pt.alloc(1.0, X.shape[0])
 
-    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
-        raise NotImplementedError
-
-    def power_spectral_density(self, omega):
-        raise NotImplementedError
-
-
-class IsotropicStationary(Stationary):
-    r"""
-    Base class for isotropic stationary kernels/covariance functions i.e. kernels
-    that only depend on ‖x - x'‖.
-    """
-
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         r2 = self.square_dist(X, Xs)
@@ -549,8 +536,11 @@ def full_r2(self, r2: TensorLike) -> TensorVariable:
             return self.full_r(self._sqrt(r2))
         raise NotImplementedError
 
+    def power_spectral_density(self, omega):
+        raise NotImplementedError
 
-class ExpQuad(IsotropicStationary):
+
+class ExpQuad(Stationary):
     r"""
     The Exponentiated Quadratic kernel.  Also referred to as the Squared
     Exponential, or Radial Basis Function kernel.
@@ -580,7 +570,7 @@ def power_spectral_density(self, omega):
         return c * pt.prod(ls) * exp
 
 
-class RatQuad(IsotropicStationary):
+class RatQuad(Stationary):
     r"""
     The Rational Quadratic kernel.
 
@@ -607,7 +597,7 @@ def full_r2(self, r2: TensorLike) -> TensorVariable:
         )
 
 
-class Matern52(IsotropicStationary):
+class Matern52(Stationary):
     r"""
     The Matern kernel with nu = 5/2.
 
@@ -646,7 +636,7 @@ def power_spectral_density(self, omega):
         return (num / den) * pt.prod(ls) * pow
 
 
-class Matern32(IsotropicStationary):
+class Matern32(Stationary):
     r"""
     The Matern kernel with nu = 3/2.
 
@@ -684,7 +674,7 @@ def power_spectral_density(self, omega):
         return (num / den) * pt.prod(ls) * pow
 
 
-class Matern12(IsotropicStationary):
+class Matern12(Stationary):
     r"""
     The Matern kernel with nu = 1/2
 
@@ -697,7 +687,7 @@ def full_r(self, r: TensorLike) -> TensorVariable:
         return pt.exp(-r)
 
 
-class Exponential(IsotropicStationary):
+class Exponential(Stationary):
     r"""
     The Exponential kernel.
 
@@ -710,11 +700,23 @@ def full_r(self, r: TensorLike) -> TensorVariable:
         return pt.exp(-0.5 * r)
 
 
+class Cosine(Stationary):
+    r"""
+    The Cosine kernel.
+
+    .. math::
+       k(x, x') = \mathrm{cos}\left( 2 \pi \frac{||x - x'||}{ \ell^2} \right)
+    """
+
+    def full_r(self, r: TensorLike) -> TensorVariable:
+        return pt.cos(2.0 * np.pi * r)
+
+
 class Periodic(Stationary):
     r"""
     The Periodic kernel.
 
-    This can be used to wrap any `IsotropicStationary` kernel to transform it into a periodic
+    This can be used to wrap any `Stationary` kernel to transform it into a periodic
     version. The canonical form (based on the `ExpQuad` kernel) is given by:
 
     .. math::
@@ -754,7 +756,7 @@ def __init__(
         ls=None,
         ls_inv=None,
         active_dims: Optional[Sequence[int]] = None,
-        base_kernel_class: Type[IsotropicStationary] = ExpQuad,
+        base_kernel_class: Type[Stationary] = ExpQuad,
         **base_kernel_kwargs,
     ) -> None:
         super().__init__(input_dim, ls, ls_inv, active_dims)
@@ -766,7 +768,7 @@ def __init__(
         self.base_kernel_kwargs = base_kernel_kwargs
 
     @property
-    def base_kernel(self) -> IsotropicStationary:
+    def base_kernel(self) -> Stationary:
         """Instantiation of the base kernel."""
         return self.base_kernel_class(
             self.input_dim,
@@ -791,19 +793,6 @@ def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable
         return K
 
 
-class Cosine(Stationary):
-    r"""
-    The Cosine kernel.
-
-    .. math::
-       k(x, x') = \mathrm{cos}\left( 2 \pi \frac{||x - x'||}{ \ell^2} \right)
-    """
-
-    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
-        X, Xs = self._slice(X, Xs)
-        return pt.cos(2.0 * np.pi * self.euclidean_dist(X, Xs))
-
-
 class Linear(Covariance):
     r"""
     The Linear kernel.

From e5ec36145c21478b9857d8c0b4a1b3e0df376bf0 Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Wed, 24 May 2023 11:33:57 +0100
Subject: [PATCH 07/13] Remove -> None from init type annotations, and minor
 cleanup

---
 pymc/gp/cov.py | 85 +++++++++++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 35 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 05bc35457d..16c5dd1266 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -18,7 +18,7 @@
 from collections import Counter
 from functools import reduce
 from operator import add, mul
-from typing import Optional, Sequence, Type, Union
+from typing import Callable, Optional, Sequence, Type, Union
 
 import numpy as np
 import pytensor.tensor as pt
@@ -56,7 +56,10 @@ class BaseCovariance:
     """
 
     def __call__(
-        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+        self,
+        X: TensorLike,
+        Xs: Optional[TensorLike] = None,
+        diag: bool = False,
     ) -> TensorVariable:
         r"""
         Evaluate the kernel/covariance function.
@@ -149,7 +152,7 @@ class Covariance(BaseCovariance):
         function operates on.
     """
 
-    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None) -> None:
+    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None):
         self.input_dim = input_dim
         if active_dims is None:
             self.active_dims = np.arange(input_dim)
@@ -186,7 +189,7 @@ def _slice(self, X, Xs=None):
 
 
 class Combination(Covariance):
-    def __init__(self, factor_list) -> None:
+    def __init__(self, factor_list):
         """Use constituent factors to get input_dim and active_dims for the Combination covariance."""
 
         # Check if all input_dim are the same in factor_list
@@ -300,21 +303,27 @@ def _merge_factors_psd(self, omega):
 
 class Add(Combination):
     def __call__(
-        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+        self,
+        X: TensorLike,
+        Xs: Optional[TensorLike] = None,
+        diag: bool = False,
     ) -> TensorVariable:
         return reduce(add, self._merge_factors_cov(X, Xs, diag))
 
-    def power_spectral_density(self, omega):
+    def power_spectral_density(self, omega) -> TensorVariable:
         return reduce(add, self._merge_factors_psd(omega))
 
 
 class Prod(Combination):
     def __call__(
-        self, X: TensorLike, Xs: Optional[TensorLike] = None, diag: bool = False
+        self,
+        X: TensorLike,
+        Xs: Optional[TensorLike] = None,
+        diag: bool = False,
     ) -> TensorVariable:
         return reduce(mul, self._merge_factors_cov(X, Xs, diag))
 
-    def power_spectral_density(self, omega):
+    def power_spectral_density(self, omega) -> TensorVariable:
         check = Counter([isinstance(factor, Covariance) for factor in self._factor_list])
         if check.get(True) >= 2:
             raise NotImplementedError(
@@ -326,7 +335,7 @@ def power_spectral_density(self, omega):
 
 
 class Exponentiated(Covariance):
-    def __init__(self, kernel: Covariance, power) -> None:
+    def __init__(self, kernel: Covariance, power):
         self.kernel = kernel
         self.power = power
         super().__init__(input_dim=self.kernel.input_dim, active_dims=self.kernel.active_dims)
@@ -353,7 +362,7 @@ class Kron(Covariance):
     implementations.
     """
 
-    def __init__(self, factor_list) -> None:
+    def __init__(self, factor_list: Sequence[Covariance]):
         self.input_dims = [factor.input_dim for factor in factor_list]
         input_dim = sum(self.input_dims)
         super().__init__(input_dim=input_dim)
@@ -385,7 +394,7 @@ class Constant(BaseCovariance):
        k(x, x') = c
     """
 
-    def __init__(self, c) -> None:
+    def __init__(self, c):
         self.c = c
 
     def diag(self, X: TensorLike) -> TensorVariable:
@@ -407,7 +416,7 @@ class WhiteNoise(BaseCovariance):
        k(x, x') = \sigma^2 \mathrm{I}
     """
 
-    def __init__(self, sigma) -> None:
+    def __init__(self, sigma):
         self.sigma = sigma
 
     def diag(self, X: TensorLike) -> TensorVariable:
@@ -453,8 +462,12 @@ class Circular(Covariance):
     """
 
     def __init__(
-        self, input_dim: int, period, tau=4, active_dims: Optional[Sequence[int]] = None
-    ) -> None:
+        self,
+        input_dim: int,
+        period,
+        tau=4,
+        active_dims: Optional[Sequence[int]] = None,
+    ):
         super().__init__(input_dim, active_dims)
         self.c = pt.as_tensor_variable(period / 2)
         self.tau = tau
@@ -489,8 +502,12 @@ class Stationary(Covariance):
     """
 
     def __init__(
-        self, input_dim: int, ls=None, ls_inv=None, active_dims: Optional[Sequence[int]] = None
-    ) -> None:
+        self,
+        input_dim: int,
+        ls=None,
+        ls_inv=None,
+        active_dims: Optional[Sequence[int]] = None,
+    ):
         super().__init__(input_dim, active_dims)
         if (ls is None and ls_inv is None) or (ls is not None and ls_inv is not None):
             raise ValueError("Only one of 'ls' or 'ls_inv' must be provided")
@@ -536,7 +553,7 @@ def full_r2(self, r2: TensorLike) -> TensorVariable:
             return self.full_r(self._sqrt(r2))
         raise NotImplementedError
 
-    def power_spectral_density(self, omega):
+    def power_spectral_density(self, omega) -> TensorVariable:
         raise NotImplementedError
 
 
@@ -554,7 +571,7 @@ class ExpQuad(Stationary):
     def full_r2(self, r2: TensorLike) -> TensorVariable:
         return pt.exp(-0.5 * r2)
 
-    def power_spectral_density(self, omega):
+    def power_spectral_density(self, omega) -> TensorVariable:
         r"""
         The power spectral density for the ExpQuad kernel is:
 
@@ -586,7 +603,7 @@ def __init__(
         ls=None,
         ls_inv=None,
         active_dims: Optional[Sequence[int]] = None,
-    ) -> None:
+    ):
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.alpha = alpha
 
@@ -611,7 +628,7 @@ class Matern52(Stationary):
     def full_r(self, r: TensorLike) -> TensorVariable:
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * pt.square(r)) * pt.exp(-1.0 * np.sqrt(5.0) * r)
 
-    def power_spectral_density(self, omega):
+    def power_spectral_density(self, omega) -> TensorVariable:
         r"""
         The power spectral density for the Matern52 kernel is:
 
@@ -649,7 +666,7 @@ class Matern32(Stationary):
     def full_r(self, r: TensorLike) -> TensorVariable:
         return (1.0 + np.sqrt(3.0) * r) * pt.exp(-np.sqrt(3.0) * r)
 
-    def power_spectral_density(self, omega):
+    def power_spectral_density(self, omega) -> TensorVariable:
         r"""
         The power spectral density for the Matern32 kernel is:
 
@@ -758,7 +775,7 @@ def __init__(
         active_dims: Optional[Sequence[int]] = None,
         base_kernel_class: Type[Stationary] = ExpQuad,
         **base_kernel_kwargs,
-    ) -> None:
+    ):
         super().__init__(input_dim, ls, ls_inv, active_dims)
 
         if period <= 0:
@@ -801,7 +818,7 @@ class Linear(Covariance):
        k(x, x') = (x - c)(x' - c)
     """
 
-    def __init__(self, input_dim: int, c, active_dims: Optional[Sequence[int]] = None) -> None:
+    def __init__(self, input_dim: int, c, active_dims: Optional[Sequence[int]] = None):
         super().__init__(input_dim, active_dims)
         self.c = c
 
@@ -831,9 +848,7 @@ class Polynomial(Linear):
        k(x, x') = [(x - c)(x' - c) + \mathrm{offset}]^{d}
     """
 
-    def __init__(
-        self, input_dim: int, c, d, offset, active_dims: Optional[Sequence[int]] = None
-    ) -> None:
+    def __init__(self, input_dim: int, c, d, offset, active_dims: Optional[Sequence[int]] = None):
         super().__init__(input_dim, c, active_dims)
         self.d = d
         self.offset = offset
@@ -867,11 +882,11 @@ class WarpedInput(Covariance):
     def __init__(
         self,
         input_dim: int,
-        cov_func,
-        warp_func,
+        cov_func: Covariance,
+        warp_func: Callable,
         args=None,
         active_dims: Optional[Sequence[int]] = None,
-    ) -> None:
+    ):
         super().__init__(input_dim, active_dims)
         if not callable(warp_func):
             raise TypeError("warp_func must be callable")
@@ -917,7 +932,7 @@ def __init__(
         lengthscale_func,
         args=None,
         active_dims: Optional[Sequence[int]] = None,
-    ) -> None:
+    ):
         super().__init__(input_dim, active_dims)
         if active_dims is not None:
             if len(active_dims) > 1:
@@ -982,11 +997,11 @@ class ScaledCov(Covariance):
     def __init__(
         self,
         input_dim: int,
-        cov_func,
-        scaling_func,
+        cov_func: Covariance,
+        scaling_func: Callable,
         args=None,
         active_dims: Optional[Sequence[int]] = None,
-    ) -> None:
+    ):
         super().__init__(input_dim, active_dims)
         if not callable(scaling_func):
             raise TypeError("scaling_func must be callable")
@@ -1052,7 +1067,7 @@ def __init__(
         kappa=None,
         B=None,
         active_dims: Optional[Sequence[int]] = None,
-    ) -> None:
+    ):
         super().__init__(input_dim, active_dims)
         if len(self.active_dims) != 1:
             raise ValueError("Coregion requires exactly one dimension to be active")
@@ -1083,7 +1098,7 @@ def diag(self, X: TensorLike) -> TensorVariable:
         return pt.diag(self.B)[index.ravel()]
 
 
-def handle_args(func, args):
+def handle_args(func, args) -> Callable:
     def f(x, args):
         if args is None:
             return func(x)

From e69b27fc0b29d633684cd25699e13f238e567bfd Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Mon, 29 May 2023 11:07:18 +0100
Subject: [PATCH 08/13] Restricting changes to type-hints only

---
 .pre-commit-config.yaml |   1 +
 pymc/gp/cov.py          | 106 ++++++++++++++--------------------------
 2 files changed, 37 insertions(+), 70 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d4599ae17e..35ac4723b3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,6 +49,7 @@ repos:
   rev: 6.3.0
   hooks:
     - id: pydocstyle
+      additional_dependencies: [tomli]
       args:
       - --ignore=D100,D101,D102,D103,D104,D105,D107,D200,D202,D203,D204,D205,D209,D212,D213,D301,D400,D401,D403,D413,D415,D417
       files: ^pymc/
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 16c5dd1266..5d398f15b2 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -18,7 +18,7 @@
 from collections import Counter
 from functools import reduce
 from operator import add, mul
-from typing import Callable, Optional, Sequence, Type, Union
+from typing import Callable, Optional, Sequence, Union
 
 import numpy as np
 import pytensor.tensor as pt
@@ -84,23 +84,23 @@ def diag(self, X: TensorLike) -> TensorVariable:
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         raise NotImplementedError
 
-    def __add__(self, other):
+    def __add__(self, other) -> "Add":
         # If it's a scalar, cast as Constant covariance.  This allows validation for power spectral
         # density calc.
         if isinstance(other, numbers.Real):
             other = Constant(c=other)
         return Add([self, other])
 
-    def __mul__(self, other):
+    def __mul__(self, other) -> "Prod":
         return Prod([self, other])
 
-    def __radd__(self, other):
+    def __radd__(self, other) -> "Add":
         return self.__add__(other)
 
-    def __rmul__(self, other):
+    def __rmul__(self, other) -> "Prod":
         return self.__mul__(other)
 
-    def __pow__(self, other):
+    def __pow__(self, other) -> "Exponentiated":
         other = pt.as_tensor_variable(other).squeeze()
         if not other.ndim == 0:
             raise ValueError("A covariance function can only be exponentiated by a scalar value")
@@ -189,7 +189,7 @@ def _slice(self, X, Xs=None):
 
 
 class Combination(Covariance):
-    def __init__(self, factor_list):
+    def __init__(self, factor_list: Sequence):
         """Use constituent factors to get input_dim and active_dims for the Combination covariance."""
 
         # Check if all input_dim are the same in factor_list
@@ -330,7 +330,6 @@ def power_spectral_density(self, omega) -> TensorVariable:
                 "The power spectral density of products of covariance "
                 "functions is not implemented."
             )
-
         return reduce(mul, self._merge_factors_psd(omega))
 
 
@@ -544,13 +543,6 @@ def diag(self, X: TensorLike) -> TensorVariable:
         return pt.alloc(1.0, X.shape[0])
 
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
-        X, Xs = self._slice(X, Xs)
-        r2 = self.square_dist(X, Xs)
-        return self.full_r2(r2)
-
-    def full_r2(self, r2: TensorLike) -> TensorVariable:
-        if hasattr(self, "full_r"):
-            return self.full_r(self._sqrt(r2))
         raise NotImplementedError
 
     def power_spectral_density(self, omega) -> TensorVariable:
@@ -568,7 +560,9 @@ class ExpQuad(Stationary):
 
     """
 
-    def full_r2(self, r2: TensorLike) -> TensorVariable:
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
+        X, Xs = self._slice(X, Xs)
+        r2 = self.square_dist(X, Xs)
         return pt.exp(-0.5 * r2)
 
     def power_spectral_density(self, omega) -> TensorVariable:
@@ -607,7 +601,9 @@ def __init__(
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.alpha = alpha
 
-    def full_r2(self, r2: TensorLike) -> TensorVariable:
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
+        X, Xs = self._slice(X, Xs)
+        r2 = self.square_dist(X, Xs)
         return pt.power(
             (1.0 + 0.5 * r2 * (1.0 / self.alpha)),
             -1.0 * self.alpha,
@@ -625,7 +621,9 @@ class Matern52(Stationary):
                    \mathrm{exp}\left[ - \frac{\sqrt{5(x - x')^2}}{\ell} \right]
     """
 
-    def full_r(self, r: TensorLike) -> TensorVariable:
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
+        X, Xs = self._slice(X, Xs)
+        r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * pt.square(r)) * pt.exp(-1.0 * np.sqrt(5.0) * r)
 
     def power_spectral_density(self, omega) -> TensorVariable:
@@ -663,7 +661,9 @@ class Matern32(Stationary):
                   \mathrm{exp}\left[ - \frac{\sqrt{3(x - x')^2}}{\ell} \right]
     """
 
-    def full_r(self, r: TensorLike) -> TensorVariable:
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
+        X, Xs = self._slice(X, Xs)
+        r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(3.0) * r) * pt.exp(-np.sqrt(3.0) * r)
 
     def power_spectral_density(self, omega) -> TensorVariable:
@@ -700,7 +700,9 @@ class Matern12(Stationary):
         k(x, x') = \mathrm{exp}\left[ -\frac{(x - x')^2}{\ell} \right]
     """
 
-    def full_r(self, r: TensorLike) -> TensorVariable:
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
+        X, Xs = self._slice(X, Xs)
+        r = self.euclidean_dist(X, Xs)
         return pt.exp(-r)
 
 
@@ -713,7 +715,9 @@ class Exponential(Stationary):
        k(x, x') = \mathrm{exp}\left[ -\frac{||x - x'||}{2\ell} \right]
     """
 
-    def full_r(self, r: TensorLike) -> TensorVariable:
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
+        X, Xs = self._slice(X, Xs)
+        r = self.euclidean_dist(X, Xs)
         return pt.exp(-0.5 * r)
 
 
@@ -725,7 +729,9 @@ class Cosine(Stationary):
        k(x, x') = \mathrm{cos}\left( 2 \pi \frac{||x - x'||}{ \ell^2} \right)
     """
 
-    def full_r(self, r: TensorLike) -> TensorVariable:
+    def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
+        X, Xs = self._slice(X, Xs)
+        r = self.euclidean_dist(X, Xs)
         return pt.cos(2.0 * np.pi * r)
 
 
@@ -733,27 +739,9 @@ class Periodic(Stationary):
     r"""
     The Periodic kernel.
 
-    This can be used to wrap any `Stationary` kernel to transform it into a periodic
-    version. The canonical form (based on the `ExpQuad` kernel) is given by:
-
     .. math::
-
        k(x, x') = \mathrm{exp}\left( -\frac{\mathrm{sin}^2(\pi |x-x'| \frac{1}{T})}{2\ell^2} \right)
 
-    The derivation can be achieved by mapping the original inputs through the transformation
-    u = (cos(x), sin(x)).
-
-    Parameters
-    ----------
-    period: Period.  The period of the kernel.
-    ls: Lengthscale.  If input_dim > 1, a list or array of scalars or PyMC random
-        variables.  If input_dim == 1, a scalar or PyMC random variable.
-    ls_inv: Inverse lengthscale.  1 / ls.  One of ls or ls_inv must be provided.
-    base_kernel_class: Base kernel class.  The `IsotropicStationary` kernel to use
-        for the base behaviour, defaults to `ExpQuad`.
-    **base_kernel_kwargs: Additional kwargs passed to the base kernel for initialization.
-        For example, used to pass `alpha` to the `RatQuad` kernel.
-
     Notes
     -----
     Note that the scaling factor for this kernel is different compared to the more common
@@ -773,41 +761,19 @@ def __init__(
         ls=None,
         ls_inv=None,
         active_dims: Optional[Sequence[int]] = None,
-        base_kernel_class: Type[Stationary] = ExpQuad,
-        **base_kernel_kwargs,
     ):
         super().__init__(input_dim, ls, ls_inv, active_dims)
-
-        if period <= 0:
-            raise ValueError("Period parameter must be positive")
         self.period = period
-        self.base_kernel_class = base_kernel_class
-        self.base_kernel_kwargs = base_kernel_kwargs
-
-    @property
-    def base_kernel(self) -> Stationary:
-        """Instantiation of the base kernel."""
-        return self.base_kernel_class(
-            self.input_dim,
-            ls=self.ls,
-            active_dims=self.active_dims,
-            **self.base_kernel_kwargs,
-        )
 
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         X, Xs = self._slice(X, Xs)
         if Xs is None:
             Xs = X
         f1 = pt.expand_dims(X, axis=(0,))
-        f2 = pt.expand_dims(X, axis=(1,))
+        f2 = pt.expand_dims(Xs, axis=(1,))
         r = np.pi * (f1 - f2) / self.period
-        if hasattr(self.base_kernel, "full_r"):
-            r = pt.sum(pt.abs(pt.sin(r) / self.ls), 2)
-            K = self.base_kernel.full_r(r)
-        else:
-            r2 = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
-            K = self.base_kernel.full_r2(r2)
-        return K
+        r2 = pt.sum(pt.square(pt.sin(r) / self.ls), 2)
+        return pt.exp(-0.5 * r2)
 
 
 class Linear(Covariance):
@@ -892,7 +858,7 @@ def __init__(
             raise TypeError("warp_func must be callable")
         if not isinstance(cov_func, Covariance):
             raise TypeError("Must be or inherit from the Covariance class")
-        self.w = handle_args(warp_func, args)
+        self.w = handle_args(warp_func)
         self.args = args
         self.cov_func = cov_func
 
@@ -929,7 +895,7 @@ class Gibbs(Covariance):
     def __init__(
         self,
         input_dim: int,
-        lengthscale_func,
+        lengthscale_func: Callable,
         args=None,
         active_dims: Optional[Sequence[int]] = None,
     ):
@@ -942,7 +908,7 @@ def __init__(
                 raise NotImplementedError(("Higher dimensional inputs ", "are untested"))
         if not callable(lengthscale_func):
             raise TypeError("lengthscale_func must be callable")
-        self.lfunc = handle_args(lengthscale_func, args)
+        self.lfunc = handle_args(lengthscale_func)
         self.args = args
 
     def square_dist(self, X, Xs=None):
@@ -1008,7 +974,7 @@ def __init__(
         if not isinstance(cov_func, Covariance):
             raise TypeError("Must be or inherit from the Covariance class")
         self.cov_func = cov_func
-        self.scaling_func = handle_args(scaling_func, args)
+        self.scaling_func = handle_args(scaling_func)
         self.args = args
 
     def diag(self, X: TensorLike) -> TensorVariable:
@@ -1098,7 +1064,7 @@ def diag(self, X: TensorLike) -> TensorVariable:
         return pt.diag(self.B)[index.ravel()]
 
 
-def handle_args(func, args) -> Callable:
+def handle_args(func: Callable) -> Callable:
     def f(x, args):
         if args is None:
             return func(x)

From eaf9c00edc2ecd557bd55d7d38f29dfb33831dd1 Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Mon, 29 May 2023 11:08:54 +0100
Subject: [PATCH 09/13] Remove tests

---
 tests/gp/test_cov.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/gp/test_cov.py b/tests/gp/test_cov.py
index b9a7e0d231..ba8d41962c 100644
--- a/tests/gp/test_cov.py
+++ b/tests/gp/test_cov.py
@@ -616,18 +616,6 @@ def test_1d(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
-    def test_1d_ratquad(self):
-        X = np.linspace(0, 1, 10)[:, None]
-        with pm.Model() as model:
-            cov = pm.gp.cov.Periodic(1, 0.1, 0.1, base_kernel_class=pm.gp.cov.RatQuad, alpha=0.5)
-        K = cov(X).eval()
-        npt.assert_allclose(K[0, 1], 0.28063, atol=1e-3)
-        K = cov(X, X).eval()
-        npt.assert_allclose(K[0, 1], 0.28063, atol=1e-3)
-        # check diagonal
-        Kd = cov(X, diag=True).eval()
-        npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
-
 
 class TestLinear:
     def test_1d(self):

From 2b3cf551b5d9d15b063f72254b0c5a9252e1cbbc Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Fri, 2 Jun 2023 10:00:19 +0100
Subject: [PATCH 10/13] Add type hints for omega

---
 pymc/gp/cov.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 5d398f15b2..fdbe6c6976 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -214,7 +214,6 @@ def __init__(self, factor_list: Sequence):
                 dtype=int,
             )
         )
-
         super().__init__(input_dim=input_dim, active_dims=active_dims)
 
         # Set up combination kernel, flatten out factor_list so that
@@ -310,7 +309,7 @@ def __call__(
     ) -> TensorVariable:
         return reduce(add, self._merge_factors_cov(X, Xs, diag))
 
-    def power_spectral_density(self, omega) -> TensorVariable:
+    def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         return reduce(add, self._merge_factors_psd(omega))
 
 
@@ -323,7 +322,7 @@ def __call__(
     ) -> TensorVariable:
         return reduce(mul, self._merge_factors_cov(X, Xs, diag))
 
-    def power_spectral_density(self, omega) -> TensorVariable:
+    def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         check = Counter([isinstance(factor, Covariance) for factor in self._factor_list])
         if check.get(True) >= 2:
             raise NotImplementedError(
@@ -545,7 +544,7 @@ def diag(self, X: TensorLike) -> TensorVariable:
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         raise NotImplementedError
 
-    def power_spectral_density(self, omega) -> TensorVariable:
+    def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         raise NotImplementedError
 
 
@@ -565,7 +564,7 @@ def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable
         r2 = self.square_dist(X, Xs)
         return pt.exp(-0.5 * r2)
 
-    def power_spectral_density(self, omega) -> TensorVariable:
+    def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         r"""
         The power spectral density for the ExpQuad kernel is:
 
@@ -626,7 +625,7 @@ def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(5.0) * r + 5.0 / 3.0 * pt.square(r)) * pt.exp(-1.0 * np.sqrt(5.0) * r)
 
-    def power_spectral_density(self, omega) -> TensorVariable:
+    def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         r"""
         The power spectral density for the Matern52 kernel is:
 
@@ -666,7 +665,7 @@ def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable
         r = self.euclidean_dist(X, Xs)
         return (1.0 + np.sqrt(3.0) * r) * pt.exp(-np.sqrt(3.0) * r)
 
-    def power_spectral_density(self, omega) -> TensorVariable:
+    def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         r"""
         The power spectral density for the Matern32 kernel is:
 

From 46b92270a0d81b462fcbd77a249eb567f8e7dded Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Mon, 5 Jun 2023 12:42:45 +0100
Subject: [PATCH 11/13] Remove pre-commit change

---
 .pre-commit-config.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 35ac4723b3..d4599ae17e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -49,7 +49,6 @@ repos:
   rev: 6.3.0
   hooks:
     - id: pydocstyle
-      additional_dependencies: [tomli]
       args:
       - --ignore=D100,D101,D102,D103,D104,D105,D107,D200,D202,D203,D204,D205,D209,D212,D213,D301,D400,D401,D403,D413,D415,D417
       files: ^pymc/

From 5bb288cd366a54bba97c248c79a15694170df15a Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Wed, 14 Jun 2023 07:21:03 +0100
Subject: [PATCH 12/13] Fix mypy and test errors

---
 pymc/gp/cov.py       | 53 ++++++++++++++++++++++++++------------------
 tests/gp/test_cov.py | 19 +++++++++++-----
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index fdbe6c6976..3977033086 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -18,7 +18,7 @@
 from collections import Counter
 from functools import reduce
 from operator import add, mul
-from typing import Callable, Optional, Sequence, Union
+from typing import Any, Callable, List, Optional, Sequence, Union
 
 import numpy as np
 import pytensor.tensor as pt
@@ -48,6 +48,7 @@
 ]
 
 TensorLike = Union[np.ndarray, TensorVariable]
+IntSequence = Union[np.ndarray, Sequence[int]]
 
 
 class BaseCovariance:
@@ -104,6 +105,10 @@ def __pow__(self, other) -> "Exponentiated":
         other = pt.as_tensor_variable(other).squeeze()
         if not other.ndim == 0:
             raise ValueError("A covariance function can only be exponentiated by a scalar value")
+        if not isinstance(self, Covariance):
+            raise TypeError(
+                "Can only exponentiate covariance functions which inherit from `Covariance`"
+            )
         return Exponentiated(self, other)
 
     def __array_wrap__(self, result):
@@ -136,6 +141,10 @@ def __array_wrap__(self, result):
                 "Known types are `Add` or `Prod`."
             )
 
+    @staticmethod
+    def _alloc(X, *shape: int) -> TensorVariable:
+        return pt.alloc(X, *shape)  # type: ignore
+
 
 class Covariance(BaseCovariance):
     """
@@ -152,7 +161,7 @@ class Covariance(BaseCovariance):
         function operates on.
     """
 
-    def __init__(self, input_dim: int, active_dims: Optional[Sequence[int]] = None):
+    def __init__(self, input_dim: int, active_dims: Optional[IntSequence] = None):
         self.input_dim = input_dim
         if active_dims is None:
             self.active_dims = np.arange(input_dim)
@@ -217,7 +226,7 @@ def __init__(self, factor_list: Sequence):
         super().__init__(input_dim=input_dim, active_dims=active_dims)
 
         # Set up combination kernel, flatten out factor_list so that
-        self._factor_list = []
+        self._factor_list: List[Any] = []
         for factor in factor_list:
             if isinstance(factor, self.__class__):
                 self._factor_list.extend(factor._factor_list)
@@ -324,7 +333,7 @@ def __call__(
 
     def power_spectral_density(self, omega: TensorLike) -> TensorVariable:
         check = Counter([isinstance(factor, Covariance) for factor in self._factor_list])
-        if check.get(True) >= 2:
+        if check.get(True, 0) >= 2:
             raise NotImplementedError(
                 "The power spectral density of products of covariance "
                 "functions is not implemented."
@@ -396,13 +405,13 @@ def __init__(self, c):
         self.c = c
 
     def diag(self, X: TensorLike) -> TensorVariable:
-        return pt.alloc(self.c, X.shape[0])
+        return self._alloc(self.c, X.shape[0])
 
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         if Xs is None:
-            return pt.alloc(self.c, X.shape[0], X.shape[0])
+            return self._alloc(self.c, X.shape[0], X.shape[0])
         else:
-            return pt.alloc(self.c, X.shape[0], Xs.shape[0])
+            return self._alloc(self.c, X.shape[0], Xs.shape[0])
 
 
 class WhiteNoise(BaseCovariance):
@@ -418,13 +427,13 @@ def __init__(self, sigma):
         self.sigma = sigma
 
     def diag(self, X: TensorLike) -> TensorVariable:
-        return pt.alloc(pt.square(self.sigma), X.shape[0])
+        return self._alloc(pt.square(self.sigma), X.shape[0])
 
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         if Xs is None:
             return pt.diag(self.diag(X))
         else:
-            return pt.alloc(0.0, X.shape[0], Xs.shape[0])
+            return self._alloc(0.0, X.shape[0], Xs.shape[0])
 
 
 class Circular(Covariance):
@@ -464,7 +473,7 @@ def __init__(
         input_dim: int,
         period,
         tau=4,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, active_dims)
         self.c = pt.as_tensor_variable(period / 2)
@@ -485,7 +494,7 @@ def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable
         return self.weinland(self.dist(X, Xs))
 
     def diag(self, X: TensorLike) -> TensorVariable:
-        return pt.alloc(1.0, X.shape[0])
+        return self._alloc(1.0, X.shape[0])
 
 
 class Stationary(Covariance):
@@ -504,7 +513,7 @@ def __init__(
         input_dim: int,
         ls=None,
         ls_inv=None,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, active_dims)
         if (ls is None and ls_inv is None) or (ls is not None and ls_inv is not None):
@@ -539,7 +548,7 @@ def _sqrt(self, r2):
         return pt.sqrt(r2 + 1e-12)
 
     def diag(self, X: TensorLike) -> TensorVariable:
-        return pt.alloc(1.0, X.shape[0])
+        return self._alloc(1.0, X.shape[0])
 
     def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable:
         raise NotImplementedError
@@ -595,7 +604,7 @@ def __init__(
         alpha,
         ls=None,
         ls_inv=None,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.alpha = alpha
@@ -759,7 +768,7 @@ def __init__(
         period,
         ls=None,
         ls_inv=None,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, ls, ls_inv, active_dims)
         self.period = period
@@ -783,7 +792,7 @@ class Linear(Covariance):
        k(x, x') = (x - c)(x' - c)
     """
 
-    def __init__(self, input_dim: int, c, active_dims: Optional[Sequence[int]] = None):
+    def __init__(self, input_dim: int, c, active_dims: Optional[IntSequence] = None):
         super().__init__(input_dim, active_dims)
         self.c = c
 
@@ -813,7 +822,7 @@ class Polynomial(Linear):
        k(x, x') = [(x - c)(x' - c) + \mathrm{offset}]^{d}
     """
 
-    def __init__(self, input_dim: int, c, d, offset, active_dims: Optional[Sequence[int]] = None):
+    def __init__(self, input_dim: int, c, d, offset, active_dims: Optional[IntSequence] = None):
         super().__init__(input_dim, c, active_dims)
         self.d = d
         self.offset = offset
@@ -850,7 +859,7 @@ def __init__(
         cov_func: Covariance,
         warp_func: Callable,
         args=None,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, active_dims)
         if not callable(warp_func):
@@ -896,7 +905,7 @@ def __init__(
         input_dim: int,
         lengthscale_func: Callable,
         args=None,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, active_dims)
         if active_dims is not None:
@@ -937,7 +946,7 @@ def full(self, X: TensorLike, Xs: Optional[TensorLike] = None) -> TensorVariable
         return pt.sqrt((2.0 * pt.outer(rx, rz)) / (rx2 + rz2)) * pt.exp(-1.0 * r2 / (rx2 + rz2))
 
     def diag(self, X: TensorLike) -> TensorVariable:
-        return pt.alloc(1.0, X.shape[0])
+        return self._alloc(1.0, X.shape[0])
 
 
 class ScaledCov(Covariance):
@@ -965,7 +974,7 @@ def __init__(
         cov_func: Covariance,
         scaling_func: Callable,
         args=None,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, active_dims)
         if not callable(scaling_func):
@@ -1031,7 +1040,7 @@ def __init__(
         W=None,
         kappa=None,
         B=None,
-        active_dims: Optional[Sequence[int]] = None,
+        active_dims: Optional[IntSequence] = None,
     ):
         super().__init__(input_dim, active_dims)
         if len(self.active_dims) != 1:
diff --git a/tests/gp/test_cov.py b/tests/gp/test_cov.py
index ba8d41962c..feef63ab80 100644
--- a/tests/gp/test_cov.py
+++ b/tests/gp/test_cov.py
@@ -302,13 +302,20 @@ def test_covexp_shared(self):
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_invalid_covexp(self):
-        X = np.linspace(0, 1, 10)[:, None]
         with pytest.raises(
             ValueError, match=r"A covariance function can only be exponentiated by a scalar value"
         ):
-            with pm.Model() as model:
+            with pm.Model():
                 a = np.array([[1.0, 2.0]])
-                cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
+                pm.gp.cov.ExpQuad(1, 0.1) ** a
+
+    def test_invalid_covexp_noncov(self):
+        with pytest.raises(
+            TypeError,
+            match=r"Can only exponentiate covariance functions which inherit from `Covariance`",
+        ):
+            with pm.Model():
+                pm.gp.cov.Constant(2) ** 2
 
 
 class TestCovKron:
@@ -765,9 +772,9 @@ def func_twoarg(x, a, b):
         x = 100
         a = 2
         b = 3
-        func_noargs2 = pm.gp.cov.handle_args(func_noargs, None)
-        func_onearg2 = pm.gp.cov.handle_args(func_onearg, a)
-        func_twoarg2 = pm.gp.cov.handle_args(func_twoarg, args=(a, b))
+        func_noargs2 = pm.gp.cov.handle_args(func_noargs)
+        func_onearg2 = pm.gp.cov.handle_args(func_onearg)
+        func_twoarg2 = pm.gp.cov.handle_args(func_twoarg)
         assert func_noargs(x) == func_noargs2(x, args=None)
         assert func_onearg(x, a) == func_onearg2(x, args=a)
         assert func_twoarg(x, a, b) == func_twoarg2(x, args=(a, b))

From 9bf270dc3b74d51fe65d8da0414639fe47084201 Mon Sep 17 00:00:00 2001
From: Joseph Hall <joseph.hall@bp.com>
Date: Wed, 21 Jun 2023 10:59:35 +0100
Subject: [PATCH 13/13] Seems 'product' has now been deprecated from numpy

---
 pymc/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/util.py b/pymc/util.py
index 1885c25d90..8bb2e35ec8 100644
--- a/pymc/util.py
+++ b/pymc/util.py
@@ -250,7 +250,7 @@ def dataset_to_point_list(
     }
     points = [
         {vn: stacked_dict[vn][i, ...] for vn in var_names}
-        for i in range(np.product([len(coords) for coords in stacked_dims.values()]))
+        for i in range(np.prod([len(coords) for coords in stacked_dims.values()]))
     ]
     # use the list of points
     return cast(List[Dict[str, np.ndarray]], points), stacked_dims