stumpy-dev · seanlaw · Dec 1, 2020 · Nov 4, 2020 · Nov 4, 2020 · Nov 11, 2020
diff --git a/docs/Tutorial_Consensus_Motif.ipynb b/docs/Tutorial_Consensus_Motif.ipynb
diff --git a/stumpy/__init__.py b/stumpy/__init__.py
@@ -19,6 +19,7 @@
 from .aampi import aampi  # noqa: F401
 from .chains import atsc, allc  # noqa: F401
 from .floss import floss, fluss, _nnmark, _iac, _cac, _rea  # noqa: F401
+from .ostinato import ostinato  # noqa: F401
 from .scrump import scrump  # noqa: F401
 from .stumpi import stumpi  # noqa: F401
 from numba import cuda

diff --git a/stumpy/core.py b/stumpy/core.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 from numba import njit, prange
+from scipy.signal import convolve
 import tempfile
 import math
 
@@ -265,7 +266,7 @@ def sliding_dot_product(Q, T):
     n = T.shape[0]
     m = Q.shape[0]
     Qr = np.flipud(Q)  # Reverse/flip Q
-    QT = np.convolve(Qr, T)
+    QT = convolve(Qr, T)
 
     return QT.real[m - 1 : n]
 

diff --git a/stumpy/ostinato.py b/stumpy/ostinato.py
@@ -0,0 +1,258 @@
+import numpy as np
+from . import core, stump
+
+
+def ostinato(tss, m):
+    """
+    Find the consensus motif of multiple time series
+
+    This is a wrapper around the vanilla version of the ostinato algorithm
+    which finds the best radius and a helper function that finds the most
+    central conserved motif.
+
+    Parameters
+    ----------
+    tss : list
+        List of time series for which to find the consensus motif
+
+    m : int
+        Window size
+
+    Returns
+    -------
+    rad : float
+        Radius of the most central consensus motif
+
+    ts_ind : int
+        Index of time series which contains the most central consensus motif
+
+    ss_ind : int
+        Start index of the most central consensus motif within the time series
+        `ts_ind` that contains it
+
+    Notes
+    -----
+    <https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>
+
+    See Table 2
+
+    The ostinato algorithm proposed in the paper finds the best radius
+    in `tss`. Intuitively, the radius is the minimum distance of a
+    subsequence to encompass at least one nearest neighbor subsequence
+    from all other time series. The best radius in `tss` is the minimum
+    radius amongst all radii. Some data sets might contain multiple
+    subsequences which have the same optimal radius.
+    The greedy Ostinato algorithm only finds one of them, which might
+    not be the most central motif. The most central motif amongst the
+    subsequences with the best radius is the one with the smallest mean
+    distance to nearest neighbors in all other time series. To find this
+    central motif it is necessary to search the subsequences with the
+    best radius via stumpy.ostinato._get_central_motif
+    """
+    rad, ts_ind, ss_ind = _ostinato(tss, m)
+    return _get_central_motif(tss, rad, ts_ind, ss_ind, m)
+
+
+def _ostinato(tss, m):
+    """
+    Find the consensus motif of multiple time series
+
+    Parameters
+    ----------
+    tss : list
+        List of time series for which to find the consensus motif
+
+    m : int
+        Window size
+
+    Returns
+    -------
+    bsf_rad : float
+        Radius of the consensus motif
+
+    ts_ind : int
+        Index of time series which contains the consensus motif
+
+    ss_ind : int
+        Start index of consensus motif within the time series ts_ind
+        that contains it
+
+    Notes
+    -----
+    <https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>
+
+    See Table 2
+
+    The ostinato algorithm proposed in the paper finds the best radius
+    in `tss`. Intuitively, the radius is the minimum distance of a
+    subsequence to encompass at least one nearest neighbor subsequence
+    from all other time series. The best radius in `tss` is the minimum
+    radius amongst all radii. Some data sets might contain multiple
+    subsequences which have the same optimal radius.
+    The greedy Ostinato algorithm only finds one of them, which might
+    not be the most central motif. The most central motif amongst the
+    subsequences with the best radius is the one with the smallest mean
+    distance to nearest neighbors in all other time series. To find this
+    central motif it is necessary to search the subsequences with the
+    best radius via stumpy.ostinato._get_central_motif
+    """
+    # Preprocess means and stddevs and handle np.nan/np.inf
+    Ts = [None] * len(tss)
+    M_Ts = [None] * len(tss)
+    Σ_Ts = [None] * len(tss)
+    for i, T in enumerate(tss):
+        Ts[i], M_Ts[i], Σ_Ts[i] = core.preprocess(T, m)
+
+    bsf_rad, ts_ind, ss_ind = np.inf, 0, 0
+    k = len(Ts)
+    for j in range(k):
+        if j < (k - 1):
+            h = j + 1
+        else:
+            h = 0
+
+        mp = stump(Ts[j], m, Ts[h], ignore_trivial=False)
+        si = np.argsort(mp[:, 0])
+        for q in si:
+            rad = mp[q, 0]
+            if rad >= bsf_rad:
+                break
+            for i in range(k):
+                if ~np.isin(i, [j, h]):
+                    QT = core.sliding_dot_product(Ts[j][q : q + m], Ts[i])
+                    rad = np.max(
+                        (
+                            rad,
+                            np.min(
+                                core._mass(
+                                    Ts[j][q : q + m],
+                                    Ts[i],
+                                    QT,
+                                    M_Ts[j][q],
+                                    Σ_Ts[j][q],
+                                    M_Ts[i],
+                                    Σ_Ts[i],
+                                )
+                            ),
+                        )
+                    )
+                    if rad >= bsf_rad:
+                        break
+            if rad < bsf_rad:
+                bsf_rad, ts_ind, ss_ind = rad, j, q
+
+    return bsf_rad, ts_ind, ss_ind
+
+
+def _get_central_motif(tss, rad, ts_ind, ss_ind, m):
+    """
+    Compare subsequences with the same radius and return the most central motif
+
+    Parameters
+    ----------
+    tss : list
+        List of time series for which to find the most central motif
+
+    rad : float
+        Best radius found by a consensus search algorithm
+
+    ts_ind : int
+        Index of time series in which `rad` was found first
+
+    ss_ind : int
+        Start index of subsequence in `ts_ind` that has radius `rad`
+
+    m : int
+        Window size
+
+    Returns
+    -------
+    rad : float
+        Radius of the most central consensus motif
+
+    ts_ind : int
+        Index of time series which contains the most central consensus motif
+
+    ss_ind : int
+        Start index of most central consensus motif within the time series `ts_ind`
+        that contains it
+
+    Notes
+    -----
+    <https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>
+
+    See Table 2
+
+    The ostinato algorithm proposed in the paper finds the best radius
+    in `tss`. Intuitively, the radius is the minimum distance of a
+    subsequence to encompass at least one nearest neighbor subsequence
+    from all other time series. The best radius in `tss` is the minimum
+    radius amongst all radii. Some data sets might contain multiple
+    subsequences which have the same optimal radius.
+    The greedy Ostinato algorithm only finds one of them, which might
+    not be the most central motif. The most central motif amongst the
+    subsequences with the best radius is the one with the smallest mean
+    distance to nearest neighbors in all other time series. To find this
+    central motif it is necessary to search the subsequences with the
+    best radius via stumpy.ostinato._get_central_motif
+    """
+    k = len(tss)
+
+    # Ostinato hit: get nearest neighbors' distances and indices
+    q_ost = tss[ts_ind][ss_ind : ss_ind + m]
+    ss_ind_nn_ost, d_ost = _across_series_nearest_neighbors(q_ost, tss)
+
+    # Alternative candidates: Distance to ostinato hit equals best radius
+    ts_ind_alt = np.flatnonzero(np.isclose(d_ost, rad))
+    ss_ind_alt = ss_ind_nn_ost[ts_ind_alt]
+    d_alt = np.zeros((len(ts_ind_alt), k), dtype=float)
+    for i, (tsi, ssi) in enumerate(zip(ts_ind_alt, ss_ind_alt)):
+        q = tss[tsi][ssi : ssi + m]
+        _, d_alt[i] = _across_series_nearest_neighbors(q, tss)
+    rad_alt = np.max(d_alt, axis=1)
+    d_mean_alt = np.mean(d_alt, axis=1)
+
+    # Alternatives with same radius and lower mean distance
+    alt_better = np.logical_and(
+        np.isclose(rad_alt, rad),
+        d_mean_alt < d_ost.mean(),
+    )
+    if np.any(alt_better):
+        ts_ind_alt = ts_ind_alt[alt_better]
+        ss_ind_alt = ss_ind_alt[alt_better]
+        d_mean_alt = d_mean_alt[alt_better]
+        i_alt_best = np.argmin(d_mean_alt)
+        return rad, ts_ind_alt[i_alt_best], ss_ind_alt[i_alt_best]
+    else:
+        return rad, ts_ind, ss_ind
+
+
+def _across_series_nearest_neighbors(q, tss):
+    """
+    For multiple time series find, per individual time series, the subsequences closest
+    to a query.
+
+    Parameters
+    ----------
+    q : ndarray
+        Query array or subsequence
+
+    tss : list
+        List of time series for which to the nearest neighbors to `q`
+
+    Returns
+    -------
+    ss_ind_nn : ndarray
+        Indices to subsequences in `tss` that are closest to `q`
+
+    d : ndarray
+        Distances to subsequences in `tss` that are closest to `q`
+    """
+    k = len(tss)
+    d = np.zeros(k, dtype=float)
+    ss_ind_nn = np.zeros(k, dtype=int)
+    for i in range(k):
+        dp = core.mass(q, tss[i])
+        ss_ind_nn[i] = np.argmin(dp)
+        d[i] = dp[ss_ind_nn[i]]
+    return ss_ind_nn, d
diff --git a/test.sh b/test.sh
@@ -13,7 +13,7 @@ if [ $# -gt 0 ]; then
     else
         echo "Using default test_mode=\"all\""
     fi
-fi 
+fi
 
 ###############
 #  Functions  #
@@ -63,11 +63,11 @@ test_unit()
     check_errs $?
     py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stump.py tests/test_mstump.py tests/test_scrump.py tests/test_stumpi.py
     check_errs $?
-    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped.py 
+    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped.py
     check_errs $?
-    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_one_constant_subsequence.py 
+    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_one_constant_subsequence.py
     check_errs $?
-    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_constant_subsequences.py 
+    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_constant_subsequences.py
     check_errs $?
     py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_constant_subsequences_swap.py
     check_errs $?
@@ -87,7 +87,7 @@ test_unit()
     check_errs $?
     py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_stumped_two_subsequences_nan_inf_A_B_join_swap.py
     check_errs $?
-    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mstumped.py 
+    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mstumped.py
     check_errs $?
     py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_mstumped_one_subsequence_nan_self_join.py
     check_errs $?
@@ -99,9 +99,9 @@ test_unit()
     check_errs $?
     py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped.py
     check_errs $?
-    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_one_constant_subsequence.py 
+    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_one_constant_subsequence.py
     check_errs $?
-    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_constant_subsequences.py 
+    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_constant_subsequences.py
     check_errs $?
     py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_constant_subsequences_swap.py
     check_errs $?
@@ -121,6 +121,8 @@ test_unit()
     check_errs $?
     py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_aamped_two_subsequences_nan_inf_A_B_join_swap.py
     check_errs $?
+    py.test -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning tests/test_ostinato.py
+    check_errs $?
 }
 
 test_coverage()

diff --git a/tests/test_ostinato.py b/tests/test_ostinato.py
@@ -0,0 +1,53 @@
+import numpy as np
+import numpy.testing as npt
+import stumpy
+from stumpy.ostinato import _get_central_motif
+import pytest
+
+
+def naive_consensus_search(tss, m):
+    """
+    Brute force consensus motif from
+    <https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>
+
+    See Table 1
+
+    Note that there is a bug in the pseudocode at line 8 where `i` should be `j`.
+    This implementation fixes it.
+    """
+    k = len(tss)
+
+    rad = np.inf
+    ts_ind = 0
+    ss_ind = 0
+
+    for j in range(k):
+        radii = np.zeros(len(tss[j]) - m + 1)
+        for i in range(k):
+            if i != j:
+                mp = stumpy.stump(tss[j], m, tss[i], ignore_trivial=False)
+                radii = np.max((radii, mp[:, 0]), axis=0)
+        min_rad_index = np.argmin(radii)
+        min_rad = radii[min_rad_index]
+        if min_rad < rad:
+            rad = min_rad
+            ts_ind = j
+            ss_ind = min_rad_index
+
+    return _get_central_motif(tss, rad, ts_ind, ss_ind, m)
+
+
+@pytest.mark.parametrize(
+    "seed", np.random.choice(np.arange(10000), size=100, replace=False)
+)
+def test_ostinato(seed):
+    m = 50
+    np.random.seed(seed)
+    tss = [np.random.rand(n) for n in [64, 128, 256]]
+
+    rad_naive, ts_ind_naive, ss_ind_naive = naive_consensus_search(tss, m)
+    rad_ostinato, ts_ind_ostinato, ss_ind_ostinato = stumpy.ostinato(tss, m)
+
+    npt.assert_almost_equal(rad_naive, rad_ostinato)
+    npt.assert_almost_equal(ts_ind_naive, ts_ind_ostinato)
+    npt.assert_almost_equal(ss_ind_naive, ss_ind_ostinato)