diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 56b785742..73bee1dbd 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -52,7 +52,7 @@ body:
     label: "PyMC version information:"
     description: >
       PyMC/PyMC3 Version:
-      Aesara/Theano Version:
+      PyTensor/Aesara Version:
       Python Version:
       Operating system:
       How did you install PyMC/PyMC3: (conda/pip)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 83860b895..72a68cda1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -40,7 +40,7 @@ jobs:
           - |
             pymc/tests/test_util.py
             pymc/tests/distributions/test_logprob.py
-            pymc/tests/test_aesaraf.py
+            pymc/tests/test_pytensorf.py
             pymc/tests/test_math.py
             pymc/tests/backends/test_base.py
             pymc/tests/backends/test_ndarray.py
@@ -102,7 +102,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     env:
       TEST_SUBSET: ${{ matrix.test-subset }}
-      AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native'
+      PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native'
     defaults:
       run:
         shell: bash -l {0}
@@ -173,7 +173,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     env:
       TEST_SUBSET: ${{ matrix.test-subset }}
-      AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2'
+      PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2'
     defaults:
       run:
         shell: cmd
@@ -252,7 +252,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     env:
       TEST_SUBSET: ${{ matrix.test-subset }}
-      AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native'
+      PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native'
     defaults:
       run:
         shell: bash -l {0}
@@ -317,7 +317,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     env:
       TEST_SUBSET: ${{ matrix.test-subset }}
-      AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native'
+      PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=native'
     defaults:
       run:
         shell: bash -l {0}
@@ -387,7 +387,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     env:
       TEST_SUBSET: ${{ matrix.test-subset }}
-      AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2'
+      PYTENSOR_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2'
     defaults:
       run:
         shell: cmd
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fb4be72fd..083150677 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -84,7 +84,7 @@ repos:
       entry: >
           (?x)(arviz-devs.github.io|
                python.arviz.org|
-               aesara.readthedocs.io|
+               pytensor.readthedocs.io|
                pymc-experimental.readthedocs.io|
                docs.pymc.io|
                www.pymc.io|
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 26357c328..89f5e73a6 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -25,7 +25,7 @@ It is easier to start with functionality that is not present in PyMC but
 rather deferred to outside libraries. If seeking to understand any
 of the topics below refer to that specific library
 
-### Aesara
+### PyTensor
 * Gradient computation
 * Random number generation
 * Low level tensor operation definition
@@ -62,7 +62,7 @@ In no particular order they are
 
 * `ContextMeta`: The context manager that enables the `with pm.Model() as model` syntax
 * {class}`~pymc.Factor`: Defines the methods for the various logprobs for models
-* `ValueGrad` which handles the value and gradient and is the main connection point to Aesara
+* `ValueGrad` which handles the value and gradient and is the main connection point to PyTensor
 * `Deterministic` and `Potential`: Definitions for two pieces of functionality useful in some model definitions
 
 ## distributions/
@@ -74,7 +74,7 @@ Important modules to note are
   a random variable distribution from a likelihood distribution.
 
 * `logprob.py`: This contains the log probability logic for the distributions themselves.
-  The log probability calculation is deferred to Aesara
+  The log probability calculation is deferred to PyTensor
 
 * `dist_math.py`: Various convenience operators for distributions.
   This includes mathematical operators such as `logpower` or `all_true`methods.
diff --git a/README.rst b/README.rst
index c8b3c247b..521eaa615 100644
--- a/README.rst
+++ b/README.rst
@@ -26,7 +26,7 @@ Features
 -  **Variational inference**: `ADVI <http://www.jmlr.org/papers/v18/16-107.html>`__
    for fast approximate posterior estimation as well as mini-batch ADVI
    for large data sets.
--  Relies on `Aesara <https://aesara.readthedocs.io/en/latest/>`__ which provides:
+-  Relies on `PyTensor <https://pytensor.readthedocs.io/en/latest/>`__ which provides:
     *  Computation optimization and dynamic C or JAX compilation
     *  NumPy broadcasting and advanced indexing
     *  Linear algebra operators
diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py
index 9ebe1494c..26abf8e45 100644
--- a/benchmarks/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks/benchmarks.py
@@ -14,11 +14,11 @@
 import time
 import timeit
 
-import aesara
-import aesara.tensor as at
 import arviz as az
 import numpy as np
 import pandas as pd
+import pytensor
+import pytensor.tensor as at
 
 import pymc as pm
 
@@ -27,7 +27,7 @@ def glm_hierarchical_model(random_seed=123):
     """Sample glm hierarchical model to use in benchmarks"""
     np.random.seed(random_seed)
     data = pd.read_csv(pm.get_data("radon.csv"))
-    data["log_radon"] = data["log_radon"].astype(aesara.config.floatX)
+    data["log_radon"] = data["log_radon"].astype(pytensor.config.floatX)
     county_idx = data.county_code.values
 
     n_counties = len(data.county.unique())
diff --git a/conda-envs/environment-dev.yml b/conda-envs/environment-dev.yml
index 3bd3e03eb..0600471e7 100644
--- a/conda-envs/environment-dev.yml
+++ b/conda-envs/environment-dev.yml
@@ -5,7 +5,6 @@ channels:
 - defaults
 dependencies:
 # Base dependencies
-- aesara=2.8.8
 - arviz>=0.13.0
 - blas
 - cachetools>=4.2.1
@@ -15,6 +14,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
+- pytensor=2.8.10
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/conda-envs/environment-test.yml b/conda-envs/environment-test.yml
index c443a5e13..fc1decdec 100644
--- a/conda-envs/environment-test.yml
+++ b/conda-envs/environment-test.yml
@@ -5,7 +5,6 @@ channels:
 - defaults
 dependencies:
 # Base dependencies
-- aesara=2.8.8
 - arviz>=0.13.0
 - blas
 - cachetools>=4.2.1
@@ -17,6 +16,8 @@ dependencies:
 - mkl-service
 - numpy>=1.15.0
 - pandas>=0.24.0
+- pip
+- pytensor=2.8.10
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/conda-envs/windows-environment-dev.yml b/conda-envs/windows-environment-dev.yml
index 1abeb09bf..0459beb8a 100644
--- a/conda-envs/windows-environment-dev.yml
+++ b/conda-envs/windows-environment-dev.yml
@@ -5,7 +5,6 @@ channels:
 - defaults
 dependencies:
 # Base dependencies (see install guide for Windows)
-- aesara=2.8.8
 - arviz>=0.13.0
 - blas
 - cachetools>=4.2.1
@@ -15,6 +14,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
+- pytensor=2.8.10
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/conda-envs/windows-environment-test.yml b/conda-envs/windows-environment-test.yml
index 2a262ccd9..b637993a3 100644
--- a/conda-envs/windows-environment-test.yml
+++ b/conda-envs/windows-environment-test.yml
@@ -5,7 +5,6 @@ channels:
 - defaults
 dependencies:
 # Base dependencies (see install guide for Windows)
-- aesara=2.8.8
 - arviz>=0.13.0
 - blas
 - cachetools>=4.2.1
@@ -18,6 +17,7 @@ dependencies:
 - numpy>=1.15.0
 - pandas>=0.24.0
 - pip
+- pytensor=2.8.10
 - python-graphviz
 - networkx
 - scipy>=1.4.1
diff --git a/docs/source/PyMC_and_Aesara.rst b/docs/source/PyMC_and_PyTensor.rst
similarity index 81%
rename from docs/source/PyMC_and_Aesara.rst
rename to docs/source/PyMC_and_PyTensor.rst
index 7bf1b5d63..28ffbb542 100644
--- a/docs/source/PyMC_and_Aesara.rst
+++ b/docs/source/PyMC_and_PyTensor.rst
@@ -3,21 +3,21 @@
 ..
     _href from docs/source/index.rst
 
-===============
-PyMC and Aesara
-===============
+=================
+PyMC and PyTensor
+=================
 
-What is Aesara
-==============
+What is PyTensor
+================
 
-Aesara is a package that allows us to define functions involving array
+PyTensor is a package that allows us to define functions involving array
 operations and linear algebra. When we define a PyMC model, we implicitly
-build up an Aesara function from the space of our parameters to
+build up an PyTensor function from the space of our parameters to
 their posterior probability density up to a constant factor. We then use
 symbolic manipulations of this function to also get access to its gradient.
 
-For a thorough introduction to Aesara see the
-:doc:`aesara docs <aesara:index>`,
+For a thorough introduction to PyTensor see the
+:doc:`pytensor docs <pytensor:index>`,
 but for the most part you don't need detailed knowledge about it as long
 as you are not trying to define new distributions or other extensions
 of PyMC. But let's look at a simple example to get a rough
@@ -33,8 +33,8 @@ arbitrarily chosen) function
 First, we need to define symbolic variables for our inputs (this
 is similar to eg SymPy's `Symbol`)::
 
-    import aesara
-    import aesara.tensor as at
+    import pytensor
+    import pytensor.tensor as at
     # We don't specify the dtype of our input variables, so it
     # defaults to using float64 without any special config.
     a = at.scalar('a')
@@ -56,16 +56,16 @@ do to compute the output::
    of the exponential of `inner`. Somewhat surprisingly, it
    would also have worked if we used `np.exp`. This is because numpy
    gives objects it operates on a chance to define the results of
-   operations themselves. Aesara variables do this for a large number
-   of operations. We usually still prefer the Aesara
+   operations themselves. PyTensor variables do this for a large number
+   of operations. We usually still prefer the PyTensor
    functions instead of the numpy versions, as that makes it clear that
    we are working with symbolic input instead of plain arrays.
 
-Now we can tell Aesara to build a function that does this computation.
-With a typical configuration, Aesara generates C code, compiles it,
+Now we can tell PyTensor to build a function that does this computation.
+With a typical configuration, PyTensor generates C code, compiles it,
 and creates a python function which wraps the C function::
 
-    func = aesara.function([a, x, y], [out])
+    func = pytensor.function([a, x, y], [out])
 
 We can call this function with actual arrays as many times as we want::
 
@@ -75,15 +75,15 @@ We can call this function with actual arrays as many times as we want::
 
     out = func(a_val, x_vals, y_vals)
 
-For the most part the symbolic Aesara variables can be operated on
-like NumPy arrays. Most NumPy functions are available in `aesara.tensor`
+For the most part the symbolic PyTensor variables can be operated on
+like NumPy arrays. Most NumPy functions are available in `pytensor.tensor`
 (which is typically imported as `at`). A lot of linear algebra operations
 can be found in `at.nlinalg` and `at.slinalg` (the NumPy and SciPy
 operations respectively). Some support for sparse matrices is available
-in `aesara.sparse`. For a detailed overview of available operations,
-see :mod:`the aesara api docs <aesara.tensor>`.
+in `pytensor.sparse`. For a detailed overview of available operations,
+see :mod:`the pytensor api docs <pytensor.tensor>`.
 
-A notable exception where Aesara variables do *not* behave like
+A notable exception where PyTensor variables do *not* behave like
 NumPy arrays are operations involving conditional execution.
 
 Code like this won't work as expected::
@@ -123,16 +123,16 @@ Changing elements of an array is possible using `at.set_subtensor`::
     a = at.vector('a')
     b = at.set_subtensor(a[:10], 1)
 
-    # is roughly equivalent to this (although aesara avoids
+    # is roughly equivalent to this (although pytensor avoids
     # the copy if `a` isn't used anymore)
     a = np.random.randn(10)
     b = a.copy()
     b[:10] = 1
 
-How PyMC uses Aesara
+How PyMC uses PyTensor
 ====================
 
-Now that we have a basic understanding of Aesara we can look at what
+Now that we have a basic understanding of PyTensor we can look at what
 happens if we define a PyMC model. Let's look at a simple example::
 
     true_mu = 0.1
@@ -159,7 +159,7 @@ where with the normal likelihood :math:`N(x|μ,σ^2)`
 
 To build that function we need to keep track of two things: The parameter
 space (the *free variables*) and the logp function. For each free variable
-we generate an Aesara variable. And for each variable (observed or otherwise)
+we generate an PyTensor variable. And for each variable (observed or otherwise)
 we add a term to the global logp. In the background something similar to
 this is happening::
 
@@ -177,7 +177,7 @@ So calling `pm.Normal()` modifies the model: It changes the logp function
 of the model. If the `observed` keyword isn't set it also creates a new
 free variable. In contrast, `pm.Normal.dist()` doesn't care about the model,
 it just creates an object that represents the normal distribution. Calling
-`logp` on this object creates an Aesara variable for the logp probability
+`logp` on this object creates an PyTensor variable for the logp probability
 or log probability density of the distribution, but again without changing
 the model in any way.
 
@@ -209,8 +209,8 @@ is roughly equivalent to this::
     model.add_logp_term(pm.Normal.dist(mu, sigma).logp(data))
 
 The return values of the variable constructors are subclasses
-of Aesara variables, so when we define a variable we can use any
-Aesara operation on them::
+of PyTensor variables, so when we define a variable we can use any
+PyTensor operation on them::
 
     design_matrix = np.array([[...]])
     with pm.Model() as model:
diff --git a/docs/source/api.rst b/docs/source/api.rst
index 06ec5e682..ded5fb0be 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -18,7 +18,7 @@ API
    api/ode
    api/tuning
    api/math
-   api/aesaraf
+   api/pytensorf
    api/shape_utils
    api/misc
 
diff --git a/docs/source/api/math.rst b/docs/source/api/math.rst
index c471a7bf8..5d1194cbb 100644
--- a/docs/source/api/math.rst
+++ b/docs/source/api/math.rst
@@ -3,8 +3,8 @@ Math
 ====
 
 This submodule contains various mathematical functions. Most of them are imported directly
-from aesara.tensor (see there for more details). Doing any kind of math with PyMC random
-variables, or defining custom likelihoods or priors requires you to use these Aesara
+from pytensor.tensor (see there for more details). Doing any kind of math with PyMC random
+variables, or defining custom likelihoods or priors requires you to use these PyTensor
 expressions rather than NumPy or Python code.
 
 .. currentmodule:: pymc
diff --git a/docs/source/api/aesaraf.rst b/docs/source/api/pytensorf.rst
similarity index 95%
rename from docs/source/api/aesaraf.rst
rename to docs/source/api/pytensorf.rst
index 3469cec8d..4dd1dc677 100644
--- a/docs/source/api/aesaraf.rst
+++ b/docs/source/api/pytensorf.rst
@@ -1,4 +1,4 @@
-Aesara utils
+PyTensor utils
 ************
 
 .. currentmodule:: pymc
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ed5cb9d4d..456ffc40d 100755
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -63,8 +63,8 @@
 }
 # fmt: on
 numpydoc_xref_aliases = {
-    "TensorVariable": ":class:`~aesara.tensor.TensorVariable`",
-    "RandomVariable": ":class:`~aesara.tensor.random.RandomVariable`",
+    "TensorVariable": ":class:`~pytensor.tensor.TensorVariable`",
+    "RandomVariable": ":class:`~pytensor.tensor.random.RandomVariable`",
     "ndarray": ":class:`~numpy.ndarray`",
     "Covariance": ":mod:`Covariance <pymc.gp.cov>`",
     "Mean": ":mod:`Mean <pymc.gp.mean>`",
@@ -74,7 +74,7 @@
     "Point": ":class:`~pymc.Point`",
     "Model": ":class:`~pymc.Model`",
     "SMC_kernel": ":ref:`SMC Kernel <smc_kernels>`",
-    "Aesara_Op": ":class:`Aesara Op <aesara.graph.op.Op>`",
+    "PyTensor_Op": ":class:`PyTensor Op <pytensor.graph.op.Op>`",
     "tensor_like": ":term:`tensor_like`",
     "numpy_Generator": ":class:`~numpy.random.Generator`",
     "Distribution": ":ref:`Distribution <api_distributions>`",
@@ -187,7 +187,7 @@
 # intersphinx configuration to ease linking arviz docs
 intersphinx_mapping = {
     "arviz": ("https://python.arviz.org/en/latest/", None),
-    "aesara": ("https://aesara.readthedocs.io/en/latest/", None),
+    "pytensor": ("https://pytensor.readthedocs.io/en/latest/", None),
     "home": ("https://www.pymc.io", None),
     "pmx": ("https://www.pymc.io/projects/experimental/en/latest", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
diff --git a/docs/source/contributing/developer_guide.md b/docs/source/contributing/developer_guide.md
index 91583dc6b..cfc25e05f 100644
--- a/docs/source/contributing/developer_guide.md
+++ b/docs/source/contributing/developer_guide.md
@@ -4,7 +4,7 @@ orphan: true
 
 # PyMC Developer Guide
 
-{doc}`PyMC <index>` is a Python package for Bayesian statistical modeling built on top of {doc}`Aesara <aesara:index>`.
+{doc}`PyMC <index>` is a Python package for Bayesian statistical modeling built on top of {doc}`PyTensor <pytensor:index>`.
 This document aims to explain the design and implementation of probabilistic programming in PyMC, with comparisons to other PPLs like TensorFlow Probability (TFP) and Pyro.
 A user-facing API introduction can be found in the {ref}`API quickstart <pymc_overview>`.
 A more accessible, user facing deep introduction can be found in [Peadar Coyle's probabilistic programming primer](https://github.com/springcoil/probabilisticprogrammingprimer).
@@ -34,8 +34,8 @@ $$
 z \sim \text{Normal}(0, 5)
 $$
 
-A call to a {class}`~pymc.Distribution` constructor as shown above returns an Aesara {class}`~aesara.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on.
-Under the hood, the variables are created through the {meth}`~pymc.Distribution.dist` API, which calls the {class}`~aesara.tensor.random.basic.RandomVariable` {class}`~aesara.graph.op.Op` corresponding to the distribution.
+A call to a {class}`~pymc.Distribution` constructor as shown above returns an PyTensor {class}`~pytensor.tensor.TensorVariable`, which is a symbolic representation of the model variable and the graph of inputs it depends on.
+Under the hood, the variables are created through the {meth}`~pymc.Distribution.dist` API, which calls the {class}`~pytensor.tensor.random.basic.RandomVariable` {class}`~pytensor.graph.op.Op` corresponding to the distribution.
 
 At a high level of abstraction, the idea behind ``RandomVariable`` ``Op``s is to create symbolic variables (``TensorVariable``s) that can be associated with the properties of a probability distribution.
 For example, the ``RandomVariable`` ``Op`` which becomes part of the symbolic computation graph is associated with the random number generators or probability mass/density functions of the distribution.
@@ -46,8 +46,8 @@ In the example above, where the ``TensorVariable`` ``z`` is created to be {math}
 with pm.Model():
     z = pm.Normal("z", 0, 5)
 print(type(z.owner.op))
-# ==> aesara.tensor.random.basic.NormalRV
-isinstance(z.owner.op, aesara.tensor.random.basic.RandomVariable)
+# ==> pytensor.tensor.random.basic.NormalRV
+isinstance(z.owner.op, pytensor.tensor.random.basic.RandomVariable)
 # ==> True
 ```
 
@@ -71,12 +71,12 @@ ln(0.070413) &= -2.6533
 \end{aligned}
 $$
 
-In the probabilistic programming context, this enables PyMC and its backend libraries aeppl and Aesara to create and evaluate computation graphs to compute, for example log-prior or log-likelihood values.
+In the probabilistic programming context, this enables PyMC and its backend PyTensor to create and evaluate computation graphs to compute, for example log-prior or log-likelihood values.
 
 
 ## PyMC in Comparison
 
-Within the PyMC model context, random variables are essentially Aesara tensors that can be used in all kinds of operations as if they were NumPy arrays.
+Within the PyMC model context, random variables are essentially PyTensor tensors that can be used in all kinds of operations as if they were NumPy arrays.
 This is different compared to TFP and pyro, where one needs to be more explicit about the conversion from random variables to tensors.
 
 Consider the following examples, which implement the below model.
@@ -92,8 +92,8 @@ $$
 
 ```python
 with pm.Model() as model:
-    z = pm.Normal('z', mu=0., sigma=5.)             # ==> aesara.tensor.var.TensorVariable
-    x = pm.Normal('x', mu=z, sigma=1., observed=5.) # ==> aesara.tensor.var.TensorVariable
+    z = pm.Normal('z', mu=0., sigma=5.)             # ==> pytensor.tensor.var.TensorVariable
+    x = pm.Normal('x', mu=z, sigma=1., observed=5.) # ==> pytensor.tensor.var.TensorVariable
 # The log-prior of z=2.5
 pm.logp(z, 2.5).eval()                              # ==> -2.65337645
 # ???????
@@ -134,7 +134,7 @@ model_logp                                       # ==> -6.6973152
 
 ## Behind the scenes of the ``logp`` function
 
-The ``logp`` function is straightforward - it is an Aesara function within each distribution.
+The ``logp`` function is straightforward - it is an PyTensor function within each distribution.
 It has the following signature:
 
 :::{warning}
@@ -145,21 +145,21 @@ The code block is outdated.
 def logp(self, value):
     # GET PARAMETERS
     param1, param2, ... = self.params1, self.params2, ...
-    # EVALUATE LOG-LIKELIHOOD FUNCTION, all inputs are (or array that could be convert to) Aesara tensor
+    # EVALUATE LOG-LIKELIHOOD FUNCTION, all inputs are (or array that could be convert to) PyTensor tensor
     total_log_prob = f(param1, param2, ..., value)
     return total_log_prob
 ```
 
-In the ``logp`` method, parameters and values are either Aesara tensors, or could be converted to tensors.
-It is rather convenient as the evaluation of logp is represented as a tensor (``RV.logpt``), and when we linked different ``logp`` together (e.g., summing all ``RVs.logpt`` to get the model total logp) the dependence  is taken care of by Aesara when the graph is built and compiled.
+In the ``logp`` method, parameters and values are either PyTensor tensors, or could be converted to tensors.
+It is rather convenient as the evaluation of logp is represented as a tensor (``RV.logpt``), and when we linked different ``logp`` together (e.g., summing all ``RVs.logpt`` to get the model total logp) the dependence  is taken care of by PyTensor when the graph is built and compiled.
 Again, since the compiled function depends on the nodes that already in the graph, whenever you want to generate a new function that takes new input tensors you either need to regenerate the graph with the appropriate dependencies, or replace the node by editing the existing graph.
-In PyMC we use the second approach by using ``aesara.clone_replace()`` when it is needed.
+In PyMC we use the second approach by using ``pytensor.clone_replace()`` when it is needed.
 
 As explained above, distribution in a ``pm.Model()`` context automatically turn into a tensor with distribution property (PyMC random variable).
 To get the logp of a free\_RV is just evaluating the ``logp()`` [on itself](https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/model.py#L1212-L1213):
 
 ```python
-# self is a aesara.tensor with a distribution attached
+# self is a pytensor.tensor with a distribution attached
 self.logp_sum_unscaledt = distribution.logp_sum(self)
 self.logp_nojac_unscaledt = distribution.logp_nojac(self)
 ```
@@ -214,7 +214,7 @@ Thus, we have two equivalent ways of adding random variable to a model:
 with pm.Model() as m:
     x = pm.Normal('x', mu=0., sigma=1.)
 
-print(type(x))                              # ==> <class 'aesara.tensor.var.TensorVariable'>
+print(type(x))                              # ==> <class 'pytensor.tensor.var.TensorVariable'>
 print(m.free_RVs)                           # ==> [x]
 print(logpt(x, 5.0))                        # ==> Elemwise{switch,no_inplace}.0
 print(logpt(x, 5.).eval({}))                # ==> -13.418938533204672
@@ -259,7 +259,7 @@ possible (see also in
 
 
 Now, back to ``model.RV(...)`` - things returned from ``model.RV(...)``
-are Aesara tensor variables, and it is clear from looking at
+are PyTensor tensor variables, and it is clear from looking at
 ``TransformedRV``:
 
 .. code:: python
@@ -277,7 +277,7 @@ as for ``FreeRV`` and ``ObservedRV``, they are ``TensorVariable``\s with
 
 ``Factor`` basically `enable and assign the
 logp <https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/model.py#L195-L276>`__
-(representated as a tensor also) property to an Aesara tensor (thus
+(representated as a tensor also) property to an PyTensor tensor (thus
 making it a random variable). For a ``TransformedRV``, it transforms the
 distribution into a ``TransformedDistribution``, and then ``model.Var`` is
 called again to added the RV associated with the
@@ -317,9 +317,9 @@ In a way, ``pm.Model`` is a tape machine that records what is being added to the
 * deterministics
 * potentials
 * missing\_values
-The model context then computes some simple model properties, builds a bijection mapping that transforms between dictionary and numpy/Aesara ndarray, thus allowing the ``logp``/``dlogp`` functions to have two equivalent versions:
+The model context then computes some simple model properties, builds a bijection mapping that transforms between dictionary and numpy/PyTensor ndarray, thus allowing the ``logp``/``dlogp`` functions to have two equivalent versions:
 One takes a ``dict`` as input and the other takes an ``ndarray`` as input.
-More importantly, a ``pm.Model()`` contains methods to compile Aesara functions that take Random Variables (that are also initialised within the same model) as input, for example:
+More importantly, a ``pm.Model()`` contains methods to compile PyTensor functions that take Random Variables (that are also initialised within the same model) as input, for example:
 
 ```python
 with pm.Model() as m:
@@ -365,7 +365,7 @@ The model collects all the random variables (everything in ``model.free_RVs`` an
 ```python
 @property
 def logpt(self):
-    """Aesara scalar of log-probability of the model"""
+    """PyTensor scalar of log-probability of the model"""
     with self:
         factors = [var.logpt for var in self.basic_RVs] + self.potentials
         logp = at.sum([at.sum(factor) for factor in factors])
@@ -373,10 +373,10 @@ def logpt(self):
         return logp
 ```
 
-which returns an Aesara tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the Aesara graph).
+which returns an PyTensor tensor that its value depends on the free parameters in the model (i.e., its parent nodes from the PyTensor graph).
 You can evaluate or compile into a python callable (that you can pass numpy as input args).
-Note that the logp tensor depends on its input in the Aesara graph, thus you cannot pass new tensor to generate a logp function.
-For similar reason, in PyMC we do graph copying a lot using aesara.clone_replace to replace the inputs to a tensor.
+Note that the logp tensor depends on its input in the PyTensor graph, thus you cannot pass new tensor to generate a logp function.
+For similar reason, in PyMC we do graph copying a lot using pytensor.clone_replace to replace the inputs to a tensor.
 
 ```python
 with pm.Model() as m:
@@ -388,7 +388,7 @@ print(m.basic_RVs)    # ==> [z, x, y]
 print(m.free_RVs)     # ==> [z, x]
 
 type(m.logpt)
-# aesara.tensor.var.TensorVariable
+# pytensor.tensor.var.TensorVariable
 
 m.logpt.eval({x: np.random.randn(*x.tag.test_value.shape) for x in m.free_RVs})
 # array(-51.25369126)
@@ -410,8 +410,8 @@ def logp_dlogp_function(self, grad_vars=None, **kwargs):
     return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
 ```
 
-``ValueGradFunction`` is a callable class which isolates part of the Aesara graph to compile additional Aesara functions.
-PyMC relies on ``aesara.clone_replace`` to copy the ``model.logpt`` and replace its input.
+``ValueGradFunction`` is a callable class which isolates part of the PyTensor graph to compile additional PyTensor functions.
+PyMC relies on ``pytensor.clone_replace`` to copy the ``model.logpt`` and replace its input.
 It does not edit or rewrite the graph directly.
 
 The important parts of the above function is highlighted and commented.
@@ -465,16 +465,16 @@ func_conditional(input_array2)
 
 So why is this necessary?
 One can imagine that we just compile one logp function, and do bookkeeping ourselves.
-For example, we can build the logp function in Aesara directly:
+For example, we can build the logp function in PyTensor directly:
 
 ```python
-import aesara
-func = aesara.function(m.free_RVs, m.logpt)
+import pytensor
+func = pytensor.function(m.free_RVs, m.logpt)
 func(*inputlist)
 # array(-51.0769075)
 
-logpt_grad = aesara.grad(m.logpt, m.free_RVs)
-func_d = aesara.function(m.free_RVs, logpt_grad)
+logpt_grad = pytensor.grad(m.logpt, m.free_RVs)
+func_d = pytensor.function(m.free_RVs, logpt_grad)
 func_d(*inputlist)
 # [array([ 0.74230226,  0.01658948,  1.38606194,  0.11253699, -1.07003284,
 #          2.64302891,  1.12497754, -0.35967542, -1.18117557, -1.11489642]),
@@ -486,13 +486,13 @@ func_d(*inputlist)
 Similarly, build a conditional logp:
 
 ```python
-shared = aesara.shared(inputlist[1])
-func2 = aesara.function([m.free_RVs[0]], m.logpt, givens=[(m.free_RVs[1], shared)])
+shared = pytensor.shared(inputlist[1])
+func2 = pytensor.function([m.free_RVs[0]], m.logpt, givens=[(m.free_RVs[1], shared)])
 print(func2(inputlist[0]))
 # -51.07690750130328
 
-logpt_grad2 = aesara.grad(m.logpt, m.free_RVs[0])
-func_d2 = aesara.function([m.free_RVs[0]], logpt_grad2, givens=[(m.free_RVs[1], shared)])
+logpt_grad2 = pytensor.grad(m.logpt, m.free_RVs[0])
+func_d2 = pytensor.function([m.free_RVs[0]], logpt_grad2, givens=[(m.free_RVs[1], shared)])
 print(func_d2(inputlist[0]))
 # [ 0.74230226  0.01658948  1.38606194  0.11253699 -1.07003284  2.64302891
 #   1.12497754 -0.35967542 -1.18117557 -1.11489642]
@@ -502,7 +502,7 @@ The above also gives the same logp and gradient as the output from ``model.logp_
 But the difficulty is to compile everything into a single function:
 
 ```python
-func_logp_and_grad = aesara.function(m.free_RVs, [m.logpt, logpt_grad])
+func_logp_and_grad = pytensor.function(m.free_RVs, [m.logpt, logpt_grad])
 # ==> ERROR
 ```
 
@@ -511,15 +511,15 @@ We want to have a function that return the evaluation and its gradient re each i
 We can of course wrap 2 functions - one for logp one for dlogp - and output a list.
 But that would mean we need to call 2 functions.
 In addition, when we write code using python logic to do bookkeeping when we build our conditional logp.
-Using ``aesara.clone_replace``, we always have the input to the Aesara function being a 1d vector (instead of a list of RV that each can have very different shape), thus it is very easy to do matrix operation like rotation etc.
+Using ``pytensor.clone_replace``, we always have the input to the PyTensor function being a 1d vector (instead of a list of RV that each can have very different shape), thus it is very easy to do matrix operation like rotation etc.
 
 ### Notes
-The current setup is quite powerful, as the Aesara compiled function is fairly fast to compile and to call.
+The current setup is quite powerful, as the PyTensor compiled function is fairly fast to compile and to call.
 Also, when we are repeatedly calling a conditional logp function, external RV only need to reset once.
-However, there are still significant overheads when we are passing values between Aesara graph and NumPy.
+However, there are still significant overheads when we are passing values between PyTensor graph and NumPy.
 That is the reason we often see no advantage in using GPU, because the data is copying between GPU and CPU at each function call - and for a small model, the result is a slower inference under GPU than CPU.
 
-Also, ``aesara.clone_replace`` is too convenient (PyMC internal joke is that it is like a drug - very addictive).
+Also, ``pytensor.clone_replace`` is too convenient (PyMC internal joke is that it is like a drug - very addictive).
 If all the operation happens in the graph (including the conditioning and setting value), I see no need to isolate part of the graph (via graph copying or graph rewriting) for building model and running inference.
 
 Moreover, if we are limiting to the problem that we can solved most confidently - model with all continous unknown parameters that could be sampled with dynamic HMC, there is even less need to think about graph cloning/rewriting.
@@ -559,13 +559,13 @@ Moreover, transition kernels in TFP do not flatten the tensors, see eg docstring
 
 #### Dynamic HMC
 We love NUTS, or to be more precise Dynamic HMC with complex stopping rules.
-This part is actually all done outside of Aesara, for NUTS, it includes:
+This part is actually all done outside of PyTensor, for NUTS, it includes:
 The leapfrog, dual averaging, tunning of mass matrix and step size, the tree building, sampler related statistics like divergence and energy checking.
-We actually have an Aesara version of HMC, but it has never been used, and has been removed from the main repository.
+We actually have an PyTensor version of HMC, but it has never been used, and has been removed from the main repository.
 It can still be found in the [git history](https://github.com/pymc-devs/pymc/pull/3734/commits/0fdae8207fd14f66635f3673ef267b2b8817aa68), though.
 
 #### Variational Inference (VI)
-The design of the VI module takes a different approach than MCMC - it has a functional design, and everything is done within Aesara (i.e., Optimization and building the variational objective).
+The design of the VI module takes a different approach than MCMC - it has a functional design, and everything is done within PyTensor (i.e., Optimization and building the variational objective).
 The base class of variational inference is [pymc.variational.Inference](https://github.com/pymc-devs/pymc/blob/main/pymc/variational/inference.py), where it builds the objective function by calling:
 
 ```python
@@ -615,28 +615,28 @@ def apply(self, f):
 ```
 
 Since the logp and logq are from the approximation, let's dive in further on it (there is another abstraction here - ``Group`` - that allows you to combine approximation into new approximation, but we will skip this for now and only consider ``SingleGroupApproximation`` like ``MeanField``):
-The definition of ``datalogp_norm``, ``logq_norm``, ``varlogp_norm`` are in [variational/opvi](https://github.com/pymc-devs/pymc/blob/main/pymc/variational/opvi.py), strip away the normalizing term, ``datalogp`` and ``varlogp`` are expectation of the variational free\_RVs and data logp - we clone the datalogp and varlogp from the model, replace its input with Aesara tensor that [samples from the variational posterior](https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/variational/opvi.py#L1098-L1111).
+The definition of ``datalogp_norm``, ``logq_norm``, ``varlogp_norm`` are in [variational/opvi](https://github.com/pymc-devs/pymc/blob/main/pymc/variational/opvi.py), strip away the normalizing term, ``datalogp`` and ``varlogp`` are expectation of the variational free\_RVs and data logp - we clone the datalogp and varlogp from the model, replace its input with PyTensor tensor that [samples from the variational posterior](https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/variational/opvi.py#L1098-L1111).
 For ADVI, these samples are from [a Gaussian](https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/variational/approximations.py#L84-L89).
 Note that the samples from the posterior approximations are usually 1 dimension more, so that we can compute the expectation and get the gradient of the expectation (by computing the [expectation of the gradient!](http://blog.shakirm.com/2015/10/machine-learning-trick-of-the-day-4-reparameterisation-tricks/)).
 As for the [`logq`` since it is a Gaussian `it is pretty straightforward to evaluate](https://github.com/pymc-devs/pymc/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc/variational/approximations.py#L91-L97).
 
 ##### Some challenges and insights from implementing VI.
--  Graph based approach was helpful, but Aesara had no direct access to previously created nodes in the computational graph.
+-  Graph based approach was helpful, but PyTensor had no direct access to previously created nodes in the computational graph.
    You can find a lot of ``@node_property`` usages in implementation.
    This is done to cache nodes.
    TensorFlow has graph utils for that that could potentially help in doing this.
    On the other hand graph management in Tensorflow seemed to more tricky than expected.
    The high level reason is that graph is an add only container.
 -  There were few fixed bugs not obvoius in the first place.
-   Aesara has a tool to manipulate the graph (``aesara.clone_replace``) and this tool requires extremely careful treatment when doing a lot of graph replacements at different level.
--  We coined a term ``aesara.clone_replace`` curse.
+   PyTensor has a tool to manipulate the graph (``pytensor.clone_replace``) and this tool requires extremely careful treatment when doing a lot of graph replacements at different level.
+-  We coined a term ``pytensor.clone_replace`` curse.
    We got extremely dependent on this feature.
    Internal usages are uncountable:
    -  We use this to [vectorize the model](https://github.com/pymc-devs/pymc/blob/main/pymc/model.py#L972) for both MCMC and VI to speed up computations
    -  We use this to [create sampling graph](https://github.com/pymc-devs/pymc/blob/main/pymc/variational/opvi.py#L1483) for VI. This is the case you want posterior predictive as a part of computational graph.
 
 As this is the core of the VI process, we were trying to replicate this pattern in TF.
-However, when ``aesara.clone_replace`` is called, Aesara creates a new part of the graph that can be collected by garbage collector, but TF's graph is add only.
+However, when ``pytensor.clone_replace`` is called, PyTensor creates a new part of the graph that can be collected by garbage collector, but TF's graph is add only.
 So we should solve the problem of replacing input in a different way.
 
 ### Forward sampling
@@ -669,7 +669,7 @@ There are also other error related random sample generation (e.g., [Mixture is c
     -  [Inferencing Linear Mixed Model with EM.ipynb](https://github.com/junpenglao/Planet_Sakaar_Data_Science/blob/master/Ports/Inferencing%20Linear%20Mixed%20Model%20with%20EM.ipynb)
     -  [Laplace approximation in  pymc.ipynb](https://github.com/junpenglao/Planet_Sakaar_Data_Science/blob/master/Ports/Laplace%20approximation%20in%20pymc.ipynb)
 -  Connecting it to other library within a model
-    -  Using "black box" likelihood function by creating a custom Aesara Op.
+    -  Using "black box" likelihood function by creating a custom PyTensor Op.
     -  Using emcee
 -  Using other library for inference
     -  Connecting to Julia for solving ODE (with gradient for solution that can be used in NUTS)
@@ -684,7 +684,7 @@ I implemented quite a lot of patches for mixture distribution, but still they ar
 
 #### Random methods in numpy
 There is a lot of complex logic for sampling from random variables, and because it is all in Python, we can't transform a sampling graph further.
-Unfortunately, Aesara does not have code to sample from various distributions and we didn't want to write that our own.
+Unfortunately, PyTensor does not have code to sample from various distributions and we didn't want to write that our own.
 
 #### Samplers are in Python
-While having the samplers be written in Python allows for a lot of flexibility and intuitive for experiment (writing e.g. NUTS in Aesara is also very difficult), it comes at a performance penalty and makes sampling on the GPU very inefficient because memory needs to be copied for every logp evaluation.
+While having the samplers be written in Python allows for a lot of flexibility and intuitive for experiment (writing e.g. NUTS in PyTensor is also very difficult), it comes at a performance penalty and makes sampling on the GPU very inefficient because memory needs to be copied for every logp evaluation.
diff --git a/docs/source/contributing/implementing_distribution.md b/docs/source/contributing/implementing_distribution.md
index 3c8554209..4d3ce8b4a 100644
--- a/docs/source/contributing/implementing_distribution.md
+++ b/docs/source/contributing/implementing_distribution.md
@@ -5,7 +5,7 @@ This guide provides an overview on how to implement a distribution for PyMC vers
 It is designed for developers who wish to add a new distribution to the library.
 Users will not be aware of all this complexity and should instead make use of helper methods such as `~pymc.DensityDist`.
 
-PyMC {class}`~pymc.Distribution` builds on top of Aesara's {class}`~aesara.tensor.random.op.RandomVariable`, and implements `logp`, `logcdf` and `moment` methods as well as other initialization and validation helpers.
+PyMC {class}`~pymc.Distribution` builds on top of PyTensor's {class}`~pytensor.tensor.random.op.RandomVariable`, and implements `logp`, `logcdf` and `moment` methods as well as other initialization and validation helpers.
 Most notably `shape/dims/observed` kwargs, alternative parametrizations, and default `transform`.
 
 Here is a summary check-list of the steps needed to implement a new distribution.
@@ -21,11 +21,11 @@ This guide does not attempt to explain the rationale behind the `Distributions`
 
 ## 1. Creating a new `RandomVariable` `Op`
 
-{class}`~aesara.tensor.random.op.RandomVariable` are responsible for implementing the random sampling methods, which in version 3 of PyMC used to be one of the standard `Distribution` methods, alongside `logp` and `logcdf`.
+{class}`~pytensor.tensor.random.op.RandomVariable` are responsible for implementing the random sampling methods, which in version 3 of PyMC used to be one of the standard `Distribution` methods, alongside `logp` and `logcdf`.
 The `RandomVariable` is also responsible for parameter broadcasting and shape inference.
 
 Before creating a new `RandomVariable` make sure that it is not already offered in the {mod}`NumPy library <numpy.random>`.
-If it is, it should be added to the {doc}`Aesara library <aesara:index>` first and then imported into the PyMC library.
+If it is, it should be added to the {doc}`PyTensor library <pytensor:index>` first and then imported into the PyMC library.
 
 In addition, it might not always be necessary to implement a new `RandomVariable`.
 For example if the new `Distribution` is just a special parametrization of an existing `Distribution`.
@@ -35,8 +35,8 @@ The following snippet illustrates how to create a new `RandomVariable`:
 
 ```python
 
-from aesara.tensor.var import TensorVariable
-from aesara.tensor.random.op import RandomVariable
+from pytensor.tensor.var import TensorVariable
+from pytensor.tensor.random.op import RandomVariable
 from typing import List, Tuple
 
 # Create your own `RandomVariable`...
@@ -52,7 +52,7 @@ class BlahRV(RandomVariable):
     # one a matrix and the other a scalar, `[2, 0]`; etc.)
     ndims_params: List[int] = [0, 0]
 
-    # The NumPy/Aesara dtype for this RV (e.g. `"int32"`, `"int64"`).
+    # The NumPy/PyTensor dtype for this RV (e.g. `"int32"`, `"int64"`).
     # The standard in the library is `"int64"` for discrete variables
     # and `"floatX"` for continuous variables
     dtype: str = "floatX"
@@ -87,10 +87,10 @@ blah = BlahRV()
 
 Some important things to keep in mind:
 
-1. Everything inside the `rng_fn` method is pure Python code (as are the inputs) and should not make use of other `Aesara` symbolic ops. The random method should make use of the `rng` which is a NumPy {class}`~numpy.random.RandomState`, so that samples are reproducible.
+1. Everything inside the `rng_fn` method is pure Python code (as are the inputs) and should not make use of other `PyTensor` symbolic ops. The random method should make use of the `rng` which is a NumPy {class}`~numpy.random.RandomState`, so that samples are reproducible.
 1. Non-default `RandomVariable` dimensions will end up in the `rng_fn` via the `size` kwarg. The `rng_fn` will have to take this into consideration for correct output. `size` is the specification used by NumPy and SciPy and works like PyMC `shape` for univariate distributions, but is different for multivariate distributions. For multivariate distributions the __`size` excludes the `ndim_supp` support dimensions__, whereas the __`shape` of the resulting `TensorVariabe` or `ndarray` includes the support dimensions__. For more context check {ref}`The dimensionality notebook <dimensionality>`.
-1. `Aesara` tries to infer the output shape of the `RandomVariable` (given a user-specified size) by introspection of the `ndim_supp` and `ndim_params` attributes. However, the default method may not work for more complex distributions. In that case, custom `_supp_shape_from_params` (and less probably, `_infer_shape`) should also be implemented in the new `RandomVariable` class. One simple example is seen in the {class}`~pymc.DirichletMultinomialRV` where it was necessary to specify the `rep_param_idx` so that the `default_supp_shape_from_params` helper method can do its job. In more complex cases, it may not suffice to use this default helper. This could happen for instance if the argument values determined the support shape of the distribution, as happens in the `~pymc.distributions.multivarite._LKJCholeskyCovRV`.
-1. It's okay to use the `rng_fn` `classmethods` of other Aesara and PyMC `RandomVariables` inside the new `rng_fn`. For example if you are implementing a negative HalfNormal `RandomVariable`, your `rng_fn` can simply return `- halfnormal.rng_fn(rng, scale, size)`.
+1. `PyTensor` tries to infer the output shape of the `RandomVariable` (given a user-specified size) by introspection of the `ndim_supp` and `ndim_params` attributes. However, the default method may not work for more complex distributions. In that case, custom `_supp_shape_from_params` (and less probably, `_infer_shape`) should also be implemented in the new `RandomVariable` class. One simple example is seen in the {class}`~pymc.DirichletMultinomialRV` where it was necessary to specify the `rep_param_idx` so that the `default_supp_shape_from_params` helper method can do its job. In more complex cases, it may not suffice to use this default helper. This could happen for instance if the argument values determined the support shape of the distribution, as happens in the `~pymc.distributions.multivarite._LKJCholeskyCovRV`.
+1. It's okay to use the `rng_fn` `classmethods` of other PyTensor and PyMC `RandomVariables` inside the new `rng_fn`. For example if you are implementing a negative HalfNormal `RandomVariable`, your `rng_fn` can simply return `- halfnormal.rng_fn(rng, scale, size)`.
 
 *Note: In addition to `size`, the PyMC API also provides `shape`, `dims` and `observed` as alternatives to define a distribution dimensionality, but this is taken care of by {class}`~pymc.Distribution`, and should not require any extra changes.*
 
@@ -98,7 +98,7 @@ For a quick test that your new `RandomVariable` `Op` is working, you can call th
 
 ```python
 
-# blah = aesara.tensor.random.uniform in this example
+# blah = pytensor.tensor.random.uniform in this example
 blah([0, 0], [1, 2], size=(10, 2)).eval()
 
 # array([[0.83674527, 0.76593773],
@@ -129,8 +129,8 @@ Here is how the example continues:
 
 ```python
 
-import aesara.tensor as at
-from pymc.aesaraf import floatX, intX
+import pytensor.tensor as at
+from pymc.pytensorf import floatX, intX
 from pymc.distributions.continuous import PositiveContinuous
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.shape_utils import rv_size_is_none
@@ -352,7 +352,7 @@ There are a couple of details worth keeping in mind:
 1. By default, the first and last values (edges) of the `Domain` are not compared (they are used for other things). If it is important to test the edge of the `Domain`, the edge values can be repeated. This is done by the `Bool`: `Bool = Domain([0, 0, 1, 1], "int64")`
 3. There are some default domains (such as `R` and `Rplus`) that you can use for testing your new distribution, but it's also perfectly fine to create your own domains inside the test function if there is a good reason for it (e.g., when the default values lead too many extreme unlikely combinations that are not very informative about the correctness of the implementation).
 4. By default, a random subset of 100 `param` x `paramdomain` combinations is tested, to keep the test runtime under control. When testing your shiny new distribution, you can temporarily set `n_samples=-1` to force all combinations to be tested. This is important to avoid your `PR` leading to surprising failures in future runs whenever some bad combinations of parameters are randomly tested.
-5. On GitHub some tests run twice, under the `aesara.config.floatX` flags of `"float64"` and `"float32"`. However, the reference Python functions will run in a pure "float64" environment, which means the reference and the PyMC results can diverge quite a lot (e.g., underflowing to `-np.inf` for extreme parameters). You should therefore make sure you test locally in both regimes. A quick and dirty way of doing this is to temporarily add `aesara.config.floatX = "float32"` at the very top of file, immediately after `import aesara`. Remember to set `n_samples=-1` as well to test all combinations. The test output will show what exact parameter values lead to a failure. If you are confident that your implementation is correct, you may opt to tweak the decimal precision with `select_by_precision`, or adjust the tested `Domain` values. In extreme cases, you can mark the test with a conditional `xfail` (if only one of the sub-methods is failing, they should be separated, so that the `xfail` is as narrow as possible):
+5. On GitHub some tests run twice, under the `pytensor.config.floatX` flags of `"float64"` and `"float32"`. However, the reference Python functions will run in a pure "float64" environment, which means the reference and the PyMC results can diverge quite a lot (e.g., underflowing to `-np.inf` for extreme parameters). You should therefore make sure you test locally in both regimes. A quick and dirty way of doing this is to temporarily add `pytensor.config.floatX = "float32"` at the very top of file, immediately after `import pytensor`. Remember to set `n_samples=-1` as well to test all combinations. The test output will show what exact parameter values lead to a failure. If you are confident that your implementation is correct, you may opt to tweak the decimal precision with `select_by_precision`, or adjust the tested `Domain` values. In extreme cases, you can mark the test with a conditional `xfail` (if only one of the sub-methods is failing, they should be separated, so that the `xfail` is as narrow as possible):
 
 ```python
 
@@ -361,7 +361,7 @@ def test_blah_logp(self):
 
 
 @pytest.mark.xfail(
-   condition=(aesara.config.floatX == "float32"),
+   condition=(pytensor.config.floatX == "float32"),
    reason="Fails on float32 due to numerical issues",
 )
 def test_blah_logcdf(self):
diff --git a/docs/source/contributing/jupyter_style.md b/docs/source/contributing/jupyter_style.md
index a6cee2c5f..841cf3668 100644
--- a/docs/source/contributing/jupyter_style.md
+++ b/docs/source/contributing/jupyter_style.md
@@ -495,7 +495,7 @@ This library should be in your virtual environment if you installed our `require
 
 First, add a Markdown cell with the `## Watermark` title only so it appears in the table of contents. This is the second to last section, above the epilogue/footer.  Then, add a code cell to print the versions of Python and packages used in the notebook. This is the last *code* cell in the notebook.
 
-The `p` flag is optional (or it may need to have different libraries as input), but should be added if Aesara or xarray are not imported explicitly.  This will also be checked by `pre-commit` (because we all forget to do things sometimes 😳).
+The `p` flag is optional (or it may need to have different libraries as input), but should be added if PyTensor or xarray are not imported explicitly.  This will also be checked by `pre-commit` (because we all forget to do things sometimes 😳).
 
 ```markdown
 ## Watermark
@@ -503,7 +503,7 @@ The `p` flag is optional (or it may need to have different libraries as input),
 
 ```python
 %load_ext watermark
-%watermark -n -u -v -iv -w -p aesara,aeppl,xarray
+%watermark -n -u -v -iv -w -p pytensor,xarray
 ```
 
 
diff --git a/docs/source/contributing/private_api/tests.rst b/docs/source/contributing/private_api/tests.rst
index d20a33309..e34891cdc 100644
--- a/docs/source/contributing/private_api/tests.rst
+++ b/docs/source/contributing/private_api/tests.rst
@@ -11,7 +11,7 @@ These fixtures are used to configure test functions.
 .. autosummary::
   :toctree: generated/
 
-  aesara_config
+  pytensor_config
   exception_verbosity
   strict_float32
   seeded_test
diff --git a/docs/source/contributing/review_pr_pymc_examples.md b/docs/source/contributing/review_pr_pymc_examples.md
index caf1cb557..0b108ba5c 100644
--- a/docs/source/contributing/review_pr_pymc_examples.md
+++ b/docs/source/contributing/review_pr_pymc_examples.md
@@ -45,7 +45,7 @@ needs to be updated too.
 
 A PR that aims to update everything about a notebook
 could easily have 3 or more reviewers, each covering different aspects of the
-example like Aesara usage, writing and explanation of the concepts used,
+example like PyTensor usage, writing and explanation of the concepts used,
 ArviZ usage, styling with MyST+Sphinx, or structuring and scope of the notebook.
 
 Unless you plan to review everything, start your review mentioning what your
@@ -99,7 +99,7 @@ and doesn't mean:
 For the time being (while we rerun the notebooks with v4 and update the docs to new formatting)
 please ensure all of the following:
 
-* There are **NO** URLs pointing to PyMC/ArviZ/Aesara docs
+* There are **NO** URLs pointing to PyMC/ArviZ/PyTensor docs
 * There is a post directive and MyST target at the top of the notebook.
 * The notebook is being checked by pre-commit (it should not appear in any exclude section in `.pre-commit-config.yaml`)
 * No watermark (this is already CI breaking but is still included here for full context)
@@ -121,7 +121,7 @@ This might be moved to a comment added by a bot to every new PR, see [pymc-examp
 * Check code, outputs and supporting text in ReviewNB
 * Check styling and rendering in readthedocs preview and MyST notebook representation
 * Check there are:
-  - **No** URLs pointing to PyMC/Aesara/ArviZ docs
+  - **No** URLs pointing to PyMC/PyTensor/ArviZ docs
   - A post directive with tags and categories and MyST target at the top of the notebook
   - A watermark with all relevant libraries for reproducibility at the bottom of the notebook
 * Check CI is passing and the notebook is being checked by pre-commit
diff --git a/docs/source/glossary.md b/docs/source/glossary.md
index 5bfdba119..c0dd156c8 100644
--- a/docs/source/glossary.md
+++ b/docs/source/glossary.md
@@ -125,10 +125,10 @@ Hierarchical Ordinary Differential Equation
   Markov chain Monte Carlo (MCMC) methods comprise a class of algorithms for sampling from a probability distribution. By constructing a {term}`Markov Chain` that has the desired distribution as its equilibrium distribution, one can obtain a sample of the desired distribution by recording states from the chain.  Various algorithms exist for constructing chains, including the Metropolis–Hastings algorithm.
 
 tensor_like
-  Any scalar or sequence that can be interpreted as a {class}`~aesara.tensor.TensorVariable`. In addition to TensorVariables, this includes NumPy ndarrays, scalars, lists and tuples (possibly nested). Any argument accepted by `aesara.tensor.as_tensor_variable` is tensor_like.
+  Any scalar or sequence that can be interpreted as a {class}`~pytensor.tensor.TensorVariable`. In addition to TensorVariables, this includes NumPy ndarrays, scalars, lists and tuples (possibly nested). Any argument accepted by `pytensor.tensor.as_tensor_variable` is tensor_like.
 
   ```{jupyter-execute}
-  import aesara.tensor as at
+  import pytensor.tensor as at
 
   at.as_tensor_variable([[1, 2.0], [0, 0]])
   ```
diff --git a/docs/source/guides/Gaussian_Processes.rst b/docs/source/guides/Gaussian_Processes.rst
index 442426687..532e7c6b1 100644
--- a/docs/source/guides/Gaussian_Processes.rst
+++ b/docs/source/guides/Gaussian_Processes.rst
@@ -113,7 +113,7 @@ which allows users to combine covariance functions into new ones, for example:
 
 After the covariance function is defined, it is now a function that is
 evaluated by calling :code:`cov_func(x, x)` (or :code:`mean_func(x)`).  Since
-PyMC is built on top of Aesara, it is relatively easy to define and experiment
+PyMC is built on top of PyTensor, it is relatively easy to define and experiment
 with non-standard covariance and mean functons.  For more information check out
 the tutorial on covariance functions.
 
@@ -158,7 +158,7 @@ other type of random variable.  The first argument is the name of the random
 variable representing the function we are placing the prior over.
 The second argument is the inputs to the function that the prior is over,
 :code:`X`.  The inputs are usually known and present in the data, but they can
-also be PyMC random variables.  If the inputs are an Aesara tensor or a
+also be PyMC random variables.  If the inputs are an PyTensor tensor or a
 PyMC random variable, the :code:`shape` needs to be given.
 
 Usually at this point, inference is performed on the model.  The
diff --git a/docs/source/guides/Probability_Distributions.rst b/docs/source/guides/Probability_Distributions.rst
index 22af81667..81530746c 100644
--- a/docs/source/guides/Probability_Distributions.rst
+++ b/docs/source/guides/Probability_Distributions.rst
@@ -27,7 +27,7 @@ A variable requires at least a ``name`` argument, and zero or more model paramet
 
         p = pm.Beta('p', 1, 1, shape=(3, 3))
 
-Probability distributions are all subclasses of ``Distribution``, which in turn has two major subclasses: ``Discrete`` and ``Continuous``. In terms of data types, a ``Continuous`` random variable is given whichever floating point type is defined by ``aesara.config.floatX``, while ``Discrete`` variables are given ``int16`` types when ``aesara.config.floatX`` is ``float32``, and ``int64`` otherwise.
+Probability distributions are all subclasses of ``Distribution``, which in turn has two major subclasses: ``Discrete`` and ``Continuous``. In terms of data types, a ``Continuous`` random variable is given whichever floating point type is defined by ``pytensor.config.floatX``, while ``Discrete`` variables are given ``int16`` types when ``pytensor.config.floatX`` is ``float32``, and ``int64`` otherwise.
 
 All distributions in ``pm.distributions`` will have two important methods: ``random()`` and ``logp()`` with the following signatures:
 
diff --git a/docs/source/learn/core_notebooks/GLM_linear.ipynb b/docs/source/learn/core_notebooks/GLM_linear.ipynb
index db37daea9..7ba0feeef 100644
--- a/docs/source/learn/core_notebooks/GLM_linear.ipynb
+++ b/docs/source/learn/core_notebooks/GLM_linear.ipynb
@@ -58,7 +58,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
      ]
     },
     {
@@ -501,7 +501,7 @@
       "Python version       : 3.9.7\n",
       "IPython version      : 7.29.0\n",
       "\n",
-      "aesara: 2.3.2\n",
+      "pytensor: 2.3.2\n",
       "aeppl : 0.0.18\n",
       "\n",
       "sys       : 3.9.7 | packaged by conda-forge | (default, Sep 29 2021, 19:20:46) \n",
@@ -522,7 +522,7 @@
    "source": [
     "%load_ext watermark\n",
     "\n",
-    "%watermark -n -u -v -iv -w -p aesara"
+    "%watermark -n -u -v -iv -w -p pytensor"
    ]
   }
  ],
diff --git a/docs/source/learn/core_notebooks/dimensionality.ipynb b/docs/source/learn/core_notebooks/dimensionality.ipynb
index 8eba7adc4..be514987f 100644
--- a/docs/source/learn/core_notebooks/dimensionality.ipynb
+++ b/docs/source/learn/core_notebooks/dimensionality.ipynb
@@ -39,7 +39,7 @@
     "\n",
     "import pymc as pm\n",
     "import numpy as np\n",
-    "import aesara.tensor as at"
+    "import pytensor.tensor as at"
    ]
   },
   {
@@ -411,8 +411,8 @@
       "Inputs values: [Generator(PCG64) at 0x7F7BE616D0E0, array([], dtype=int64), array(11), array([  1,  10, 100]), array([0.1, 0.1])]\n",
       "Outputs clients: [['output'], ['output']]\n",
       "\n",
-      "HINT: Re-running with most Aesara optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the Aesara flag 'optimizer=fast_compile'. If that does not work, Aesara optimizations can be disabled with 'optimizer=None'.\n",
-      "HINT: Use the Aesara flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
+      "HINT: Re-running with most PyTensor optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the PyTensor flag 'optimizer=fast_compile'. If that does not work, PyTensor optimizations can be disabled with 'optimizer=None'.\n",
+      "HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
      ]
     }
    ],
@@ -517,8 +517,8 @@
       "Inputs values: [Generator(PCG64) at 0x7F7BE616DEE0, array([3, 4]), array(11), array([0, 1, 2]), array(1.)]\n",
       "Outputs clients: [['output'], ['output']]\n",
       "\n",
-      "HINT: Re-running with most Aesara optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the Aesara flag 'optimizer=fast_compile'. If that does not work, Aesara optimizations can be disabled with 'optimizer=None'.\n",
-      "HINT: Use the Aesara flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
+      "HINT: Re-running with most PyTensor optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the PyTensor flag 'optimizer=fast_compile'. If that does not work, PyTensor optimizations can be disabled with 'optimizer=None'.\n",
+      "HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
      ]
     }
    ],
@@ -1166,8 +1166,8 @@
       "Inputs values: [Generator(PCG64) at 0x7F7BDDA904A0, array([], dtype=int64), array(4), array([ 5, 10]), 'not shown']\n",
       "Outputs clients: [['output'], ['output']]\n",
       "\n",
-      "HINT: Re-running with most Aesara optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the Aesara flag 'optimizer=fast_compile'. If that does not work, Aesara optimizations can be disabled with 'optimizer=None'.\n",
-      "HINT: Use the Aesara flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
+      "HINT: Re-running with most PyTensor optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the PyTensor flag 'optimizer=fast_compile'. If that does not work, PyTensor optimizations can be disabled with 'optimizer=None'.\n",
+      "HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
      ]
     }
    ],
@@ -1250,8 +1250,8 @@
       "Inputs values: [Generator(PCG64) at 0x7F7BDD9D1000, array([2, 4]), array(4), array([ 5, 10]), array([0.1, 0.3, 0.6])]\n",
       "Outputs clients: [['output'], ['output']]\n",
       "\n",
-      "HINT: Re-running with most Aesara optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the Aesara flag 'optimizer=fast_compile'. If that does not work, Aesara optimizations can be disabled with 'optimizer=None'.\n",
-      "HINT: Use the Aesara flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
+      "HINT: Re-running with most PyTensor optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the PyTensor flag 'optimizer=fast_compile'. If that does not work, PyTensor optimizations can be disabled with 'optimizer=None'.\n",
+      "HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.\n"
      ]
     }
    ],
diff --git a/docs/source/learn/core_notebooks/index.md b/docs/source/learn/core_notebooks/index.md
index d849b078d..33e10686e 100644
--- a/docs/source/learn/core_notebooks/index.md
+++ b/docs/source/learn/core_notebooks/index.md
@@ -9,5 +9,5 @@ GLM_linear
 model_comparison
 posterior_predictive
 dimensionality
-pymc_aesara
+pymc_pytensor
 :::
diff --git a/docs/source/learn/core_notebooks/model_comparison.ipynb b/docs/source/learn/core_notebooks/model_comparison.ipynb
index 767354b86..fd1376539 100644
--- a/docs/source/learn/core_notebooks/model_comparison.ipynb
+++ b/docs/source/learn/core_notebooks/model_comparison.ipynb
@@ -9,7 +9,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
      ]
     },
     {
@@ -522,7 +522,7 @@
       "IPython version      : 7.29.0\n",
       "\n",
       "xarray: 0.20.2\n",
-      "aesara: 2.3.2\n",
+      "pytensor: 2.3.2\n",
       "aeppl : 0.0.18\n",
       "\n",
       "matplotlib: 3.5.1\n",
@@ -536,7 +536,7 @@
     }
    ],
    "source": [
-    "%watermark -n -u -v -iv -w -p xarray,aesara"
+    "%watermark -n -u -v -iv -w -p xarray,pytensor"
    ]
   }
  ],
diff --git a/docs/source/learn/core_notebooks/posterior_predictive.ipynb b/docs/source/learn/core_notebooks/posterior_predictive.ipynb
index 0f74b329d..53c2328c9 100644
--- a/docs/source/learn/core_notebooks/posterior_predictive.ipynb
+++ b/docs/source/learn/core_notebooks/posterior_predictive.ipynb
@@ -4632,7 +4632,7 @@
       "Python version       : 3.9.13\n",
       "IPython version      : 8.4.0\n",
       "\n",
-      "aesara: 2.6.6\n",
+      "pytensor: 2.6.6\n",
       "aeppl : 0.0.31\n",
       "\n",
       "xarray    : 2022.3.0\n",
@@ -4649,7 +4649,7 @@
    ],
    "source": [
     "%load_ext watermark\n",
-    "%watermark -n -u -v -iv -w -p aesara"
+    "%watermark -n -u -v -iv -w -p pytensor"
    ]
   }
  ],
diff --git a/docs/source/learn/core_notebooks/pymc_overview.ipynb b/docs/source/learn/core_notebooks/pymc_overview.ipynb
index c48acf181..afec52b9c 100644
--- a/docs/source/learn/core_notebooks/pymc_overview.ipynb
+++ b/docs/source/learn/core_notebooks/pymc_overview.ipynb
@@ -22,7 +22,7 @@
    "source": [
     "## Abstract\n",
     "\n",
-    "Probabilistic Programming allows for automatic Bayesian inference on user-defined probabilistic models. Gradient-based algorithms for Markov chain Monte Carlo (MCMC) sampling, known as Hamiltonian Monte Carlo (HMC), allow inference on increasingly complex models but requires gradient information that is often not trivial to calculate. PyMC is an open source probabilistic programming framework written in Python that uses Aesara to compute gradients via automatic differentiation, as well as compiling probabilistic programs on-the-fly to one of a suite of computational backends for increased speed. PyMC allows for model specification in Python code, rather than in a domain-specific language, making it easy to learn, customize, and debug. This paper is a tutorial-style introduction to this software package for those already somewhat familiar with Bayesian statistics.\n",
+    "Probabilistic Programming allows for automatic Bayesian inference on user-defined probabilistic models. Gradient-based algorithms for Markov chain Monte Carlo (MCMC) sampling, known as Hamiltonian Monte Carlo (HMC), allow inference on increasingly complex models but requires gradient information that is often not trivial to calculate. PyMC is an open source probabilistic programming framework written in Python that uses PyTensor to compute gradients via automatic differentiation, as well as compiling probabilistic programs on-the-fly to one of a suite of computational backends for increased speed. PyMC allows for model specification in Python code, rather than in a domain-specific language, making it easy to learn, customize, and debug. This paper is a tutorial-style introduction to this software package for those already somewhat familiar with Bayesian statistics.\n",
     "\n",
     "## Introduction\n",
     "\n",
@@ -30,7 +30,7 @@
     "\n",
     "Probabilistic programming in Python confers a number of advantages including multi-platform compatibility, an expressive yet clean and readable syntax, easy integration with other scientific libraries, and extensibility via C, C++, Fortran or Cython. These features make it relatively straightforward to write and use custom statistical distributions, samplers and transformation functions, as required by Bayesian analysis.\n",
     "\n",
-    "While most of PyMC's user-facing features are written in pure Python, it leverages Aesara (a fork of the Theano project) to transparently transcode models to C and compile them to machine code, thereby boosting performance. Aesara is a library that allows expressions to be defined using generalized vector data structures called *tensors*, which are tightly integrated with the popular NumPy {class}`~numpy.ndarray` data structure, and similarly allow for broadcasting and advanced indexing, just as NumPy arrays do. Aesara also automatically optimizes the likelihood's computational graph for speed and allows for compilation to a suite of computational backends, including Jax and Numba.\n",
+    "While most of PyMC's user-facing features are written in pure Python, it leverages PyTensor (a fork of the Theano project) to transparently transcode models to C and compile them to machine code, thereby boosting performance. PyTensor is a library that allows expressions to be defined using generalized vector data structures called *tensors*, which are tightly integrated with the popular NumPy {class}`~numpy.ndarray` data structure, and similarly allow for broadcasting and advanced indexing, just as NumPy arrays do. PyTensor also automatically optimizes the likelihood's computational graph for speed and allows for compilation to a suite of computational backends, including Jax and Numba.\n",
     "\n",
     "Here, we present a primer on the use of PyMC for solving general Bayesian statistical inference and prediction problems. We will first see the basics of how to use PyMC, motivated by a simple example: installation, data creation, model definition, model fitting and posterior analysis. Then we will cover two case studies and use them to show how to define and fit more sophisticated models. Finally we will discuss a couple of other useful features: custom distributions and arbitrary deterministic nodes."
    ]
@@ -177,7 +177,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
+      "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n"
      ]
     },
     {
@@ -3343,7 +3343,7 @@
    },
    "outputs": [],
    "source": [
-    "import aesara.tensor as at\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "with pm.Model(coords={\"predictors\": X.columns.values}) as test_score_model:\n",
     "\n",
@@ -4144,9 +4144,9 @@
    "source": [
     "## Arbitrary deterministics\n",
     "\n",
-    "Due to its reliance on Aesara, PyMC provides many mathematical functions and operators for transforming random variables into new random variables. However, the library of functions in Aesara is not exhaustive, therefore Aesara and PyMC provide functionality for creating arbitrary functions in pure Python, and including these functions in PyMC models. This is supported with the `as_op` function decorator.\n",
+    "Due to its reliance on PyTensor, PyMC provides many mathematical functions and operators for transforming random variables into new random variables. However, the library of functions in PyTensor is not exhaustive, therefore PyTensor and PyMC provide functionality for creating arbitrary functions in pure Python, and including these functions in PyMC models. This is supported with the `as_op` function decorator.\n",
     "\n",
-    "Aesara needs to know the types of the inputs and outputs of a function, which are specified for `as_op` by `itypes` for inputs and `otypes` for outputs. "
+    "PyTensor needs to know the types of the inputs and outputs of a function, which are specified for `as_op` by `itypes` for inputs and `otypes` for outputs. "
    ]
   },
   {
@@ -4155,7 +4155,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aesara.compile.ops import as_op\n",
+    "from pytensor.compile.ops import as_op\n",
     "\n",
     "\n",
     "@as_op(itypes=[at.lscalar], otypes=[at.lscalar])\n",
@@ -4175,7 +4175,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "An important drawback of this approach is that it is not possible for `aesara` to inspect these functions in order to compute the gradient required for the Hamiltonian-based samplers. Therefore, it is not possible to use the HMC or NUTS samplers for a model that uses such an operator. However, it is possible to add a gradient if we inherit from {class}`~aesara.Op` instead of using `as_op`. The PyMC example set includes [a more elaborate example of the usage of as_op](https://github.com/pymc-devs/pymc-examples/blob/main/examples/case_studies/disaster_model_theano_op.py)."
+    "An important drawback of this approach is that it is not possible for `pytensor` to inspect these functions in order to compute the gradient required for the Hamiltonian-based samplers. Therefore, it is not possible to use the HMC or NUTS samplers for a model that uses such an operator. However, it is possible to add a gradient if we inherit from {class}`~pytensor.Op` instead of using `as_op`. The PyMC example set includes [a more elaborate example of the usage of as_op](https://github.com/pymc-devs/pymc-examples/blob/main/examples/case_studies/disaster_model_theano_op.py)."
    ]
   },
   {
@@ -4187,7 +4187,7 @@
     "Similarly, the library of statistical distributions in PyMC is not exhaustive, but PyMC allows for the creation of user-defined functions for an arbitrary probability distribution. For simple statistical distributions, the {class}`~pymc.DensityDist` class takes as an argument any function that calculates a log-probability $log(p(x))$. This function may employ other random variables in its calculation. Here is an example inspired by a blog post by Jake Vanderplas on which priors to use for a linear regression (Vanderplas, 2014). \n",
     "\n",
     "```python\n",
-    "import aesara.tensor as at\n",
+    "import pytensor.tensor as at\n",
     "\n",
     "with pm.Model() as model:\n",
     "    alpha = pm.Uniform('intercept', -100, 100)\n",
@@ -4207,7 +4207,7 @@
    "source": [
     "For more complex distributions, one can create a subclass of {class}`~pymc.Continuous` or {class}`~pymc.Discrete` and provide the custom `logp` function, as required. This is how the built-in distributions in PyMC are specified. As an example, fields like psychology and astrophysics have complex likelihood functions for particular processes that may require numerical approximation. \n",
     "\n",
-    "Implementing the `beta` variable above as a `Continuous` subclass is shown below, along with an associated {class}`~aesara.RandomVariable` object, an instance of which becomes an attribute of the distribution."
+    "Implementing the `beta` variable above as a `Continuous` subclass is shown below, along with an associated {class}`~pytensor.RandomVariable` object, an instance of which becomes an attribute of the distribution."
    ]
   },
   {
@@ -4262,7 +4262,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If your logp can not be expressed in Aesara, you can decorate the function with `as_op` as follows: `@as_op(itypes=[at.dscalar], otypes=[at.dscalar])`. Note, that this will create a blackbox Python function that will be much slower and  not provide the gradients necessary for e.g. NUTS."
+    "If your logp can not be expressed in PyTensor, you can decorate the function with `as_op` as follows: `@as_op(itypes=[at.dscalar], otypes=[at.dscalar])`. Note, that this will create a blackbox Python function that will be much slower and  not provide the gradients necessary for e.g. NUTS."
    ]
   },
   {
@@ -4328,7 +4328,7 @@
       "\n",
       "numpy     : 1.20.3\n",
       "matplotlib: 3.5.1\n",
-      "aesara    : 2.3.2\n",
+      "pytensor    : 2.3.2\n",
       "pymc      : 4.0.0b2\n",
       "arviz     : 0.11.4\n",
       "pandas    : 1.3.4\n",
diff --git a/docs/source/learn/core_notebooks/pymc_aesara.ipynb b/docs/source/learn/core_notebooks/pymc_pytensor.ipynb
similarity index 94%
rename from docs/source/learn/core_notebooks/pymc_aesara.ipynb
rename to docs/source/learn/core_notebooks/pymc_pytensor.ipynb
index 8d1d172bd..3011e2e27 100644
--- a/docs/source/learn/core_notebooks/pymc_aesara.ipynb
+++ b/docs/source/learn/core_notebooks/pymc_pytensor.ipynb
@@ -4,13 +4,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "(pymc_aesara)=\n",
+    "(pymc_pytensor)=\n",
     "\n",
-    "# PyMC and Aesara\n",
+    "# PyMC and PyTensor\n",
     "\n",
     "**Authors:** [Ricardo Vieira](https://github.com/ricardoV94) and [Juan Orduz](https://juanitorduz.github.io/)\n",
     "\n",
-    "In this notebook we want to give an introduction of how PyMC models translate to Aesara graphs. The purpose is not to give a detailed description of all [`aesara`](https://github.com/aesara-devs/aesara)'s capabilities but rather focus on the main concepts to understand its connection with PyMC. For a more detailed description of the project please refer to the official documentation."
+    "In this notebook we want to give an introduction of how PyMC models translate to PyTensor graphs. The purpose is not to give a detailed description of all [`pytensor`](https://github.com/pytensor-devs/pytensor)'s capabilities but rather focus on the main concepts to understand its connection with PyMC. For a more detailed description of the project please refer to the official documentation."
    ]
   },
   {
@@ -32,8 +32,8 @@
    },
    "outputs": [],
    "source": [
-    "import aesara\n",
-    "import aesara.tensor as at\n",
+    "import pytensor\n",
+    "import pytensor.tensor as at\n",
     "import pymc as pm\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
@@ -44,18 +44,18 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Introduction to Aesara\n",
+    "## Introduction to PyTensor\n",
     "\n",
-    "We start by looking into `aesara`. According to their documentation\n",
+    "We start by looking into `pytensor`. According to their documentation\n",
     "\n",
-    "> Aesara is a Python library that allows one to define, optimize, and efficiently evaluate mathematical expressions involving multi-dimensional arrays."
+    "> PyTensor is a Python library that allows one to define, optimize, and efficiently evaluate mathematical expressions involving multi-dimensional arrays."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![aesara logo](https://raw.githubusercontent.com/aesara-devs/aesara/main/doc/images/aesara_logo_2400.png)"
+    "![pytensor logo](https://raw.githubusercontent.com/pytensor-devs/pytensor/main/doc/images/pytensor_logo_2400.png)"
    ]
   },
   {
@@ -64,7 +64,7 @@
    "source": [
     "### A simple example\n",
     "\n",
-    "To begin, we define some aesara tensors and show how to perform some basic operations."
+    "To begin, we define some pytensor tensors and show how to perform some basic operations."
    ]
   },
   {
@@ -139,7 +139,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can use the {func}`~aesara.dprint` function to print the computational graph of any given tensor."
+    "We can use the {func}`~pytensor.dprint` function to print the computational graph of any given tensor."
    ]
   },
   {
@@ -170,14 +170,14 @@
     }
    ],
    "source": [
-    "aesara.dprint(w)"
+    "pytensor.dprint(w)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note that this graph does not do any computation (yet!). It is simply defining the sequence of steps to be done. We can use {func}`~aesara.function` to define a callable object so that we can push values trough the graph."
+    "Note that this graph does not do any computation (yet!). It is simply defining the sequence of steps to be done. We can use {func}`~pytensor.function` to define a callable object so that we can push values trough the graph."
    ]
   },
   {
@@ -186,7 +186,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "f = aesara.function(inputs=[x, y], outputs=w)"
+    "f = pytensor.function(inputs=[x, y], outputs=w)"
    ]
   },
   {
@@ -221,7 +221,7 @@
    "metadata": {},
    "source": [
     ":::{tip}\n",
-    "Sometimes we just want to debug, we can use {meth}`~aesara.graph.basic.Variable.eval` for that:\n",
+    "Sometimes we just want to debug, we can use {meth}`~pytensor.graph.basic.Variable.eval` for that:\n",
     ":::"
    ]
   },
@@ -276,9 +276,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Aesara is clever!\n",
+    "### PyTensor is clever!\n",
     "\n",
-    "One of the most important features of `aesara` is that it can automatically optimize the mathematical operations inside a graph. Let's consider a simple example:"
+    "One of the most important features of `pytensor` is that it can automatically optimize the mathematical operations inside a graph. Let's consider a simple example:"
    ]
   },
   {
@@ -313,7 +313,7 @@
     "c = a / b\n",
     "c.name = \"a / b\"\n",
     "\n",
-    "aesara.dprint(c)"
+    "pytensor.dprint(c)"
    ]
   },
   {
@@ -354,7 +354,7 @@
     "d = b * c\n",
     "d.name = \"b * c\"\n",
     "\n",
-    "aesara.dprint(d)"
+    "pytensor.dprint(d)"
    ]
   },
   {
@@ -389,20 +389,20 @@
     }
    ],
    "source": [
-    "g = aesara.function(inputs=[a, b], outputs=d)\n",
+    "g = pytensor.function(inputs=[a, b], outputs=d)\n",
     "\n",
-    "aesara.dprint(g)"
+    "pytensor.dprint(g)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### What is in an Aesara graph?\n",
+    "### What is in an PyTensor graph?\n",
     "\n",
-    "The following diagram shows the basic structure of an `aesara` graph.\n",
+    "The following diagram shows the basic structure of an `pytensor` graph.\n",
     "\n",
-    "![aesara graph](https://raw.githubusercontent.com/aesara-devs/aesara/main/doc/tutorial/apply.png)"
+    "![pytensor graph](https://raw.githubusercontent.com/pytensor-devs/pytensor/main/doc/tutorial/apply.png)"
    ]
   },
   {
@@ -506,7 +506,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Note that this is very similar to the output of {func}`~aesara.dprint` function introduced above."
+    "Note that this is very similar to the output of {func}`~pytensor.dprint` function introduced above."
    ]
   },
   {
@@ -537,7 +537,7 @@
     }
    ],
    "source": [
-    "aesara.dprint(w)"
+    "pytensor.dprint(w)"
    ]
   },
   {
@@ -546,7 +546,7 @@
    "source": [
     "### Graph manipulation 101\n",
     "\n",
-    "Another interesting feature of Aesara is the ability to manipulate the computational graph, something that is not possible with TensorFlow or PyTorch. Here we continue with the example above in order to illustrate the main idea around this technique."
+    "Another interesting feature of PyTensor is the ability to manipulate the computational graph, something that is not possible with TensorFlow or PyTorch. Here we continue with the example above in order to illustrate the main idea around this technique."
    ]
   },
   {
@@ -567,14 +567,14 @@
    ],
    "source": [
     "# get input tensors\n",
-    "list(aesara.graph.graph_inputs(graphs=[w]))"
+    "list(pytensor.graph.graph_inputs(graphs=[w]))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As a simple example, let's add an {func}`~aesara.tensor.exp` before the {func}`~aesara.tensor.log` (to get the identity function)."
+    "As a simple example, let's add an {func}`~pytensor.tensor.exp` before the {func}`~pytensor.tensor.log` (to get the identity function)."
    ]
   },
   {
@@ -623,14 +623,14 @@
     }
    ],
    "source": [
-    "aesara.dprint(w)"
+    "pytensor.dprint(w)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To modify the graph we need to use the {func}`~aesara.clone_replace` function, which *returns a copy of the initial subgraph with the corresponding substitutions.*"
+    "To modify the graph we need to use the {func}`~pytensor.clone_replace` function, which *returns a copy of the initial subgraph with the corresponding substitutions.*"
    ]
   },
   {
@@ -662,9 +662,9 @@
     }
    ],
    "source": [
-    "new_w = aesara.clone_replace(output=[w], replace={parent_of_w: new_parent_of_w})[0]\n",
+    "new_w = pytensor.clone_replace(output=[w], replace={parent_of_w: new_parent_of_w})[0]\n",
     "new_w.name = \"log(exp(x + y))\"\n",
-    "aesara.dprint(new_w)"
+    "pytensor.dprint(new_w)"
    ]
   },
   {
@@ -706,7 +706,7 @@
    "metadata": {},
    "source": [
     ":::{note}\n",
-    "Again, note that `aesara` is clever enough to omit the `exp` and `log` once we compile the function.\n",
+    "Again, note that `pytensor` is clever enough to omit the `exp` and `log` once we compile the function.\n",
     ":::"
    ]
   },
@@ -737,9 +737,9 @@
     }
    ],
    "source": [
-    "f = aesara.function(inputs=[x, y], outputs=new_w)\n",
+    "f = pytensor.function(inputs=[x, y], outputs=new_w)\n",
     "\n",
-    "aesara.dprint(f)"
+    "pytensor.dprint(f)"
    ]
   },
   {
@@ -766,9 +766,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Aesara RandomVariables\n",
+    "### PyTensor RandomVariables\n",
     "\n",
-    "Now that we have seen aesara's basics we want to move in the direction of random variables.\n",
+    "Now that we have seen pytensor's basics we want to move in the direction of random variables.\n",
     "\n",
     "How do we generate random numbers in [`numpy`](https://github.com/numpy/numpy)? To illustrate it we can sample from a normal distribution:"
    ]
@@ -803,7 +803,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now let's try to do it in Aesara."
+    "Now let's try to do it in PyTensor."
    ]
   },
   {
@@ -831,7 +831,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next, we show the graph using {func}`~aesara.dprint`."
+    "Next, we show the graph using {func}`~pytensor.dprint`."
    ]
   },
   {
@@ -863,7 +863,7 @@
     }
    ],
    "source": [
-    "aesara.dprint(y)"
+    "pytensor.dprint(y)"
    ]
   },
   {
@@ -883,7 +883,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We *could* sample by calling {meth}`~aesara.graph.basic.Variable.eval`. on the random variable."
+    "We *could* sample by calling {meth}`~pytensor.graph.basic.Variable.eval`. on the random variable."
    ]
   },
   {
@@ -1000,7 +1000,7 @@
    ],
    "source": [
     "x = pm.Normal.dist(mu=0, sigma=1)\n",
-    "aesara.dprint(x)"
+    "pytensor.dprint(x)"
    ]
   },
   {
@@ -1014,7 +1014,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can try to generate samples by calling {meth}`~aesara.graph.basic.Variable.eval` as above."
+    "We can try to generate samples by calling {meth}`~pytensor.graph.basic.Variable.eval` as above."
    ]
   },
   {
@@ -1128,7 +1128,7 @@
     "with pm.Model() as model:\n",
     "    z = pm.Normal(name=\"z\", mu=np.array([0, 0]), sigma=np.array([1, 2]))\n",
     "\n",
-    "aesara.dprint(z)"
+    "pytensor.dprint(z)"
    ]
   },
   {
@@ -1187,14 +1187,14 @@
     }
    ],
    "source": [
-    "aesara.dprint(model.basic_RVs[0])"
+    "pytensor.dprint(model.basic_RVs[0])"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can try to sample via {meth}`~aesara.graph.basic.Variable.eval` as above and it is no surprise that we are getting the same samples at each iteration."
+    "We can try to sample via {meth}`~pytensor.graph.basic.Variable.eval` as above and it is no surprise that we are getting the same samples at each iteration."
    ]
   },
   {
@@ -1345,7 +1345,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`z_logp` contains the Aesara graph that represents the log-probability of the normal random variable `z`, evaluated at `z_value`."
+    "`z_logp` contains the PyTensor graph that represents the log-probability of the normal random variable `z`, evaluated at `z_value`."
    ]
   },
   {
@@ -1396,7 +1396,7 @@
     }
    ],
    "source": [
-    "aesara.dprint(z_logp)"
+    "pytensor.dprint(z_logp)"
    ]
   },
   {
@@ -1411,7 +1411,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Observe that, as explained at the beginning, there has been no computation yet. The actual computation is performed after compiling and passing the input. For illustration purposes alone, we will again use the handy {meth}`~aesara.graph.basic.Variable.eval` method."
+    "Observe that, as explained at the beginning, there has been no computation yet. The actual computation is performed after compiling and passing the input. For illustration purposes alone, we will again use the handy {meth}`~pytensor.graph.basic.Variable.eval` method."
    ]
   },
   {
@@ -1519,14 +1519,14 @@
     }
    ],
    "source": [
-    "aesara.dprint(model.logp(sum=False))"
+    "pytensor.dprint(model.logp(sum=False))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Because we only have one variable, this function is equivalent to what we obtained by manually calling `pm.logp` before. We can also use a helper {meth}`~pymc.Model.compile_logp` to return an already compiled Aesara function of the model logp."
+    "Because we only have one variable, this function is equivalent to what we obtained by manually calling `pm.logp` before. We can also use a helper {meth}`~pymc.Model.compile_logp` to return an already compiled PyTensor function of the model logp."
    ]
   },
   {
@@ -1757,7 +1757,7 @@
     }
    ],
    "source": [
-    "# extract values as aesara.tensor.var.TensorVariable\n",
+    "# extract values as pytensor.tensor.var.TensorVariable\n",
     "mu_value = model_2.rvs_to_values[mu]\n",
     "sigma_log_value = model_2.rvs_to_values[sigma]\n",
     "x_value = model_2.rvs_to_values[x]\n",
@@ -1806,7 +1806,7 @@
    "metadata": {},
    "source": [
     ":::{Note}\n",
-    "For `sigma_log_value` we add the $-10$ term for the `scipy` and `aesara` to match because of the jacobian.\n",
+    "For `sigma_log_value` we add the $-10$ term for the `scipy` and `pytensor` to match because of the jacobian.\n",
     ":::"
    ]
   },
@@ -1814,7 +1814,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As we already saw, we can also use the method {meth}`~pymc.Model.compile_logp` to obtain a compiled aesara function of the model logp, which takes a dictionary of `{value variable name : value}` as inputs:"
+    "As we already saw, we can also use the method {meth}`~pymc.Model.compile_logp` to obtain a compiled pytensor function of the model logp, which takes a dictionary of `{value variable name : value}` as inputs:"
    ]
   },
   {
@@ -1848,7 +1848,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If you want to go deeper into the internals of `aesara` RandomVariables and `pymc` distributions please take a look into the [distribution developer guide](implementing-a-distribution)."
+    "If you want to go deeper into the internals of `pytensor` RandomVariables and `pymc` distributions please take a look into the [distribution developer guide](implementing-a-distribution)."
    ]
   }
  ],
diff --git a/pymc/__init__.py b/pymc/__init__.py
index 90fc22dcd..9cbbf1601 100644
--- a/pymc/__init__.py
+++ b/pymc/__init__.py
@@ -26,10 +26,10 @@
 
 
 def __set_compiler_flags():
-    # Workarounds for Aesara compiler problems on various platforms
-    import aesara
+    # Workarounds for PyTensor compiler problems on various platforms
+    import pytensor
 
-    current = aesara.config.gcc__cxxflags
+    current = pytensor.config.gcc__cxxflags
     augmented = f"{current} -Wno-c++11-narrowing"
 
     # Work around compiler bug in GCC < 8.4 related to structured exception
@@ -41,13 +41,12 @@ def __set_compiler_flags():
     # Now disable the generation of stack unwinding tables.
     augmented = f"{augmented} -fno-unwind-tables -fno-asynchronous-unwind-tables"
 
-    aesara.config.gcc__cxxflags = augmented
+    pytensor.config.gcc__cxxflags = augmented
 
 
 __set_compiler_flags()
 
 from pymc import _version, gp, ode, sampling
-from pymc.aesaraf import *
 from pymc.backends import *
 from pymc.blocking import *
 from pymc.data import *
@@ -68,6 +67,7 @@ def __set_compiler_flags():
 from pymc.model_graph import model_to_graphviz, model_to_networkx
 from pymc.plots import *
 from pymc.printing import *
+from pymc.pytensorf import *
 from pymc.sampling import *
 from pymc.smc import *
 from pymc.stats import *
diff --git a/pymc/backends/arviz.py b/pymc/backends/arviz.py
index 93a0a2a14..71c3e4ae4 100644
--- a/pymc/backends/arviz.py
+++ b/pymc/backends/arviz.py
@@ -17,16 +17,16 @@
 
 import numpy as np
 
-from aesara.graph.basic import Constant
-from aesara.tensor.sharedvar import SharedVariable
-from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 from arviz import InferenceData, concat, rcParams
 from arviz.data.base import CoordSpec, DimSpec, dict_to_dataset, requires
+from pytensor.graph.basic import Constant
+from pytensor.tensor.sharedvar import SharedVariable
+from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 
 import pymc
 
-from pymc.aesaraf import extract_obs_data
 from pymc.model import Model, modelcontext
+from pymc.pytensorf import extract_obs_data
 from pymc.util import get_default_varnames
 
 if TYPE_CHECKING:
diff --git a/pymc/backends/base.py b/pymc/backends/base.py
index aa521a6e0..6df74ad63 100644
--- a/pymc/backends/base.py
+++ b/pymc/backends/base.py
@@ -23,8 +23,8 @@
 from abc import ABC
 from typing import List, Sequence, Tuple, cast
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 
 from pymc.backends.report import SamplerReport
 from pymc.model import modelcontext
diff --git a/pymc/data.py b/pymc/data.py
index 4bdac4196..ae4f42bb3 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -22,18 +22,18 @@
 from copy import copy
 from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, cast
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.compile.sharedvalue import SharedVariable
-from aesara.graph.basic import Apply
-from aesara.tensor.type import TensorType
-from aesara.tensor.var import TensorConstant, TensorVariable
+from pytensor.compile.sharedvalue import SharedVariable
+from pytensor.graph.basic import Apply
+from pytensor.tensor.type import TensorType
+from pytensor.tensor.var import TensorConstant, TensorVariable
 
 import pymc as pm
 
-from pymc.aesaraf import convert_observed_data
+from pymc.pytensorf import convert_observed_data
 
 __all__ = [
     "get_data",
@@ -150,7 +150,7 @@ class Minibatch(TensorVariable):
         you can use it to change source of
         minibatches programmatically
     in_memory_size: ``int`` or ``List[int|slice|Ellipsis]``
-        data size for storing in ``aesara.shared``
+        data size for storing in ``pytensor.shared``
 
     Attributes
     ----------
@@ -238,7 +238,7 @@ class Minibatch(TensorVariable):
     To be more concrete about how we create a minibatch, here is a demo:
     1. create a shared variable
 
-        >>> shared = aesara.shared(data)
+        >>> shared = pytensor.shared(data)
 
     2. take a random slice of size 10:
 
@@ -262,7 +262,7 @@ class Minibatch(TensorVariable):
     Then you should create a `dict` with replacements:
 
     >>> replacements = {x: testdata}
-    >>> rnode = aesara.clone_replace(node, replacements)
+    >>> rnode = pytensor.clone_replace(node, replacements)
     >>> assert (testdata ** 2 == rnode.eval()).all()
 
     *FIXME: In the following, what is the **reason** to replace the Minibatch variable with
@@ -273,7 +273,7 @@ class Minibatch(TensorVariable):
     For example
 
     >>> replacements = {x.minibatch: x.shared}
-    >>> rnode = aesara.clone_replace(node, replacements)
+    >>> rnode = pytensor.clone_replace(node, replacements)
 
     For more complex slices some more code is needed that can seem not so clear
 
@@ -303,7 +303,7 @@ class Minibatch(TensorVariable):
 
     RNG: Dict[str, List[Any]] = collections.defaultdict(list)
 
-    @aesara.config.change_flags(compute_test_value="raise")
+    @pytensor.config.change_flags(compute_test_value="raise")
     def __init__(
         self,
         data,
@@ -321,7 +321,7 @@ def __init__(
         else:
             data = np.asarray(data, dtype)
         in_memory_slc = self.make_static_slices(in_memory_size)
-        self.shared = aesara.shared(data[tuple(in_memory_slc)])
+        self.shared = pytensor.shared(data[tuple(in_memory_slc)])
         self.update_shared_f = update_shared_f
         self.random_slc = self.make_random_slices(self.shared.shape, batch_size, random_seed)
         minibatch = self.shared[self.random_slc]
@@ -336,7 +336,7 @@ def __init__(
             minibatch = at.specify_shape(minibatch, shape)
         self.minibatch = minibatch
         super().__init__(self.minibatch.type, None, None, name=name)
-        Apply(aesara.compile.view_op, inputs=[self.minibatch], outputs=[self])
+        Apply(pytensor.compile.view_op, inputs=[self.minibatch], outputs=[self])
         self.tag.test_value = copy(self.minibatch.tag.test_value)
 
     def rslice(self, total, size, seed):
@@ -443,7 +443,7 @@ def check(t):
             slc = slc_begin + mid + slc_end
         else:
             raise TypeError("Unrecognized size type, %r" % batch_size)
-        return pm.aesaraf.ix_(*slc)
+        return pm.pytensorf.ix_(*slc)
 
     def update_shared(self):
         if self.update_shared_f is None:
@@ -533,7 +533,7 @@ def ConstantData(
 ) -> TensorConstant:
     """Alias for ``pm.Data(..., mutable=False)``.
 
-    Registers the ``value`` as a :class:`~aesara.tensor.TensorConstant` with the model.
+    Registers the ``value`` as a :class:`~pytensor.tensor.TensorConstant` with the model.
     For more information, please reference :class:`pymc.Data`.
     """
     var = Data(
@@ -559,7 +559,7 @@ def MutableData(
 ) -> SharedVariable:
     """Alias for ``pm.Data(..., mutable=True)``.
 
-    Registers the ``value`` as a :class:`~aesara.compile.sharedvalue.SharedVariable`
+    Registers the ``value`` as a :class:`~pytensor.compile.sharedvalue.SharedVariable`
     with the model. For more information, please reference :class:`pymc.Data`.
     """
     var = Data(
@@ -587,7 +587,7 @@ def Data(
     """Data container that registers a data variable with the model.
 
     Depending on the ``mutable`` setting (default: True), the variable
-    is registered as a :class:`~aesara.compile.sharedvalue.SharedVariable`,
+    is registered as a :class:`~pytensor.compile.sharedvalue.SharedVariable`,
     enabling it to be altered in value and shape, but NOT in dimensionality using
     :func:`pymc.set_data`.
 
@@ -596,7 +596,7 @@ def Data(
 
     When making predictions or doing posterior predictive sampling, the shape of the
     registered data variable will most likely need to be changed.  If you encounter an
-    Aesara shape mismatch error, refer to the documentation for
+    PyTensor shape mismatch error, refer to the documentation for
     :meth:`pymc.model.set_data`.
 
     For more information, read the notebook :ref:`nb:data_container`.
@@ -621,8 +621,8 @@ def Data(
         If True, the ``Data`` container will try to infer what the coordinates
         and dimension names should be if there is an index in ``value``.
     mutable : bool, optional
-        Switches between creating a :class:`~aesara.compile.sharedvalue.SharedVariable`
-        (``mutable=True``) vs. creating a :class:`~aesara.tensor.TensorConstant`
+        Switches between creating a :class:`~pytensor.compile.sharedvalue.SharedVariable`
+        (``mutable=True``) vs. creating a :class:`~pytensor.tensor.TensorConstant`
         (``mutable=False``).
         Consider using :class:`pymc.ConstantData` or :class:`pymc.MutableData` as less
         verbose alternatives to ``pm.Data(..., mutable=...)``.
@@ -630,7 +630,7 @@ def Data(
         version of the package. Since ``v4.1.0`` the default value is
         ``mutable=False``, with previous versions having ``mutable=True``.
     **kwargs : dict, optional
-        Extra arguments passed to :func:`aesara.shared`.
+        Extra arguments passed to :func:`pytensor.shared`.
 
     Examples
     --------
@@ -669,7 +669,7 @@ def Data(
     name = model.name_for(name)
 
     # `convert_observed_data` takes care of parameter `value` and
-    # transforms it to something digestible for Aesara.
+    # transforms it to something digestible for PyTensor.
     arr = convert_observed_data(value)
 
     if mutable is None:
@@ -682,7 +682,7 @@ def Data(
         )
         mutable = False
     if mutable:
-        x = aesara.shared(arr, name, **kwargs)
+        x = pytensor.shared(arr, name, **kwargs)
     else:
         x = at.as_tensor_variable(arr, name, **kwargs)
 
diff --git a/pymc/distributions/bound.py b/pymc/distributions/bound.py
index c23d555f7..880707549 100644
--- a/pymc/distributions/bound.py
+++ b/pymc/distributions/bound.py
@@ -13,14 +13,13 @@
 #   limitations under the License.
 import warnings
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 
-from aesara.tensor import as_tensor_variable
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.var import TensorVariable
+from pytensor.tensor import as_tensor_variable
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.var import TensorVariable
 
-from pymc.aesaraf import floatX, intX
 from pymc.distributions.continuous import BoundedContinuous, bounded_cont_transform
 from pymc.distributions.dist_math import check_parameters
 from pymc.distributions.distribution import Continuous, Discrete
@@ -28,6 +27,7 @@
 from pymc.distributions.shape_utils import to_tuple
 from pymc.distributions.transforms import _default_transform
 from pymc.model import modelcontext
+from pymc.pytensorf import floatX, intX
 from pymc.util import check_dist_not_registered
 
 __all__ = ["Bound"]
diff --git a/pymc/distributions/censored.py b/pymc/distributions/censored.py
index 7b21f35a1..d476e3324 100644
--- a/pymc/distributions/censored.py
+++ b/pymc/distributions/censored.py
@@ -11,11 +11,11 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 
-from aesara.tensor import TensorVariable
-from aesara.tensor.random.op import RandomVariable
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.random.op import RandomVariable
 
 from pymc.distributions.distribution import (
     Distribution,
diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py
index 8013bba05..844185847 100644
--- a/pymc/distributions/continuous.py
+++ b/pymc/distributions/continuous.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-# Contains code from Aeppl, Copyright (c) 2021-2022, Aesara Developers.
+# Contains code from Aeppl, Copyright (c) 2021-2022, PyTensor Developers.
 
 # coding: utf-8
 """
@@ -24,17 +24,17 @@
 
 from typing import List, Optional, Union
 
-import aesara
-import aesara.tensor as at
 import numpy as np
-
-from aesara.graph.basic import Apply, Variable
-from aesara.graph.op import Op
-from aesara.raise_op import Assert
-from aesara.tensor import gammaln
-from aesara.tensor.extra_ops import broadcast_shape
-from aesara.tensor.math import tanh
-from aesara.tensor.random.basic import (
+import pytensor
+import pytensor.tensor as at
+
+from pytensor.graph.basic import Apply, Variable
+from pytensor.graph.op import Op
+from pytensor.raise_op import Assert
+from pytensor.tensor import gammaln
+from pytensor.tensor.extra_ops import broadcast_shape
+from pytensor.tensor.math import tanh
+from pytensor.tensor.random.basic import (
     BetaRV,
     cauchy,
     chisquare,
@@ -53,8 +53,8 @@
     uniform,
     vonmises,
 )
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.var import TensorConstant
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.var import TensorConstant
 
 from pymc.logprob.abstract import _logprob, logcdf, logprob
 
@@ -76,7 +76,6 @@ def polyagamma_cdf(*args, **kwargs):
 from scipy.interpolate import InterpolatedUnivariateSpline
 from scipy.special import expit
 
-from pymc.aesaraf import floatX
 from pymc.distributions import transforms
 from pymc.distributions.dist_math import (
     SplineWrapper,
@@ -93,6 +92,7 @@ def polyagamma_cdf(*args, **kwargs):
 from pymc.distributions.shape_utils import rv_size_is_none
 from pymc.distributions.transforms import _default_transform
 from pymc.math import invlogit, logdiffexp, logit
+from pymc.pytensorf import floatX
 
 __all__ = [
     "Uniform",
@@ -1089,7 +1089,7 @@ class Beta(UnitContinuous):
     the binomial distribution.
     """
 
-    rv_op = aesara.tensor.random.beta
+    rv_op = pytensor.tensor.random.beta
 
     @classmethod
     def dist(cls, alpha=None, beta=None, mu=None, sigma=None, *args, **kwargs):
@@ -1305,7 +1305,7 @@ class Exponential(PositiveContinuous):
     def dist(cls, lam, *args, **kwargs):
         lam = at.as_tensor_variable(floatX(lam))
 
-        # Aesara exponential op is parametrized in terms of mu (1/lam)
+        # PyTensor exponential op is parametrized in terms of mu (1/lam)
         return super().dist([at.reciprocal(lam)], **kwargs)
 
     def moment(rv, size, mu):
@@ -2098,7 +2098,7 @@ def dist(cls, alpha=None, beta=None, mu=None, sigma=None, **kwargs):
         alpha = at.as_tensor_variable(floatX(alpha))
         beta = at.as_tensor_variable(floatX(beta))
 
-        # The Aesara `GammaRV` `Op` will invert the `beta` parameter itself
+        # The PyTensor `GammaRV` `Op` will invert the `beta` parameter itself
         return super().dist([alpha, beta], **kwargs)
 
     @classmethod
@@ -2122,7 +2122,7 @@ def get_alpha_beta(cls, alpha=None, beta=None, mu=None, sigma=None):
         return alpha, beta
 
     def moment(rv, size, alpha, inv_beta):
-        # The Aesara `GammaRV` `Op` inverts the `beta` parameter itself
+        # The PyTensor `GammaRV` `Op` inverts the `beta` parameter itself
         mean = alpha * inv_beta
         if not rv_size_is_none(size):
             mean = at.full(size, mean)
@@ -3657,7 +3657,7 @@ def rng_fn(cls, rng, h, z, size=None) -> np.ndarray:
         # handle the kind of rng passed to the sampler
         bg = rng._bit_generator if isinstance(rng, np.random.RandomState) else rng
         return np.asarray(
-            random_polyagamma(h, z, size=size, random_state=bg).astype(aesara.config.floatX)
+            random_polyagamma(h, z, size=size, random_state=bg).astype(pytensor.config.floatX)
         )
 
 
@@ -3676,7 +3676,7 @@ def make_node(self, x, h, z):
         z = at.as_tensor_variable(floatX(z))
         bshape = broadcast_shape(x, h, z)
         shape = [None] * len(bshape)
-        return Apply(self, [x, h, z], [at.TensorType(aesara.config.floatX, shape)()])
+        return Apply(self, [x, h, z], [at.TensorType(pytensor.config.floatX, shape)()])
 
     def perform(self, node, ins, outs):
         x, h, z = ins[0], ins[1], ins[2]
@@ -3684,7 +3684,7 @@ def perform(self, node, ins, outs):
             polyagamma_pdf(x, h, z, return_log=True)
             if self.get_pdf
             else polyagamma_cdf(x, h, z, return_log=True)
-        ).astype(aesara.config.floatX)
+        ).astype(pytensor.config.floatX)
 
 
 class PolyaGamma(PositiveContinuous):
diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py
index a220b325d..881ce2f09 100644
--- a/pymc/distributions/discrete.py
+++ b/pymc/distributions/discrete.py
@@ -13,11 +13,11 @@
 #   limitations under the License.
 import warnings
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 
-from aesara.tensor import TensorConstant
-from aesara.tensor.random.basic import (
+from pytensor.tensor import TensorConstant
+from pytensor.tensor.random.basic import (
     RandomVariable,
     ScipyRandomVariable,
     bernoulli,
@@ -33,7 +33,6 @@
 
 import pymc as pm
 
-from pymc.aesaraf import floatX, intX
 from pymc.distributions.dist_math import (
     betaln,
     binomln,
@@ -49,6 +48,7 @@
 from pymc.distributions.mixture import Mixture
 from pymc.distributions.shape_utils import rv_size_is_none
 from pymc.math import sigmoid
+from pymc.pytensorf import floatX, intX
 from pymc.vartypes import continuous_types
 
 __all__ = [
diff --git a/pymc/distributions/dist_math.py b/pymc/distributions/dist_math.py
index ad3bc8f63..a17cd588d 100644
--- a/pymc/distributions/dist_math.py
+++ b/pymc/distributions/dist_math.py
@@ -21,23 +21,23 @@
 
 from typing import Iterable
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import scipy.linalg
 import scipy.stats
 
-from aesara.compile.builders import OpFromGraph
-from aesara.graph.basic import Apply, Variable
-from aesara.graph.op import Op
-from aesara.scalar import UnaryScalarOp, upgrade_to_float_no_complex
-from aesara.tensor import gammaln
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.slinalg import Cholesky, SolveTriangular
+from pytensor.compile.builders import OpFromGraph
+from pytensor.graph.basic import Apply, Variable
+from pytensor.graph.op import Op
+from pytensor.scalar import UnaryScalarOp, upgrade_to_float_no_complex
+from pytensor.tensor import gammaln
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.slinalg import Cholesky, SolveTriangular
 
-from pymc.aesaraf import floatX
 from pymc.distributions.shape_utils import to_tuple
 from pymc.logprob.utils import CheckParameterValue
+from pymc.pytensorf import floatX
 
 solve_lower = SolveTriangular(lower=True)
 solve_upper = SolveTriangular(lower=False)
@@ -156,14 +156,14 @@ def log_diff_normal_cdf(mu, sigma, x, y):
 
 def sigma2rho(sigma):
     """
-    `sigma -> rho` Aesara converter
+    `sigma -> rho` PyTensor converter
     :math:`mu + sigma*e = mu + log(1+exp(rho))*e`"""
     return at.log(at.exp(at.abs(sigma)) - 1.0)
 
 
 def rho2sigma(rho):
     """
-    `rho -> sigma` Aesara converter
+    `rho -> sigma` PyTensor converter
     :math:`mu + sigma*e = mu + log(1+exp(rho))*e`"""
     return at.softplus(rho)
 
@@ -282,7 +282,7 @@ def dlogp(inputs, gradients):
 
 class SplineWrapper(Op):
     """
-    Creates an Aesara operation from scipy.interpolate.UnivariateSpline
+    Creates an PyTensor operation from scipy.interpolate.UnivariateSpline
     """
 
     __props__ = ("spline",)
@@ -345,7 +345,7 @@ def impl(self, x):
     def grad(self, inp, grads):
         (x,) = inp
         (gz,) = grads
-        return (gz * (i1e_scalar(x) - aesara.scalar.sgn(x) * i0e_scalar(x)),)
+        return (gz * (i1e_scalar(x) - pytensor.scalar.sgn(x) * i0e_scalar(x)),)
 
 
 i0e_scalar = I0e(upgrade_to_float_no_complex, name="i0e")
@@ -476,7 +476,7 @@ def log_i0(x):
 
 def incomplete_beta(a, b, value):
     warnings.warn(
-        "incomplete_beta has been deprecated. Use aesara.tensor.betainc instead.",
+        "incomplete_beta has been deprecated. Use pytensor.tensor.betainc instead.",
         FutureWarning,
         stacklevel=2,
     )
diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 8c7a84116..a60bbc0d0 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -23,19 +23,18 @@
 
 import numpy as np
 
-from aesara import tensor as at
-from aesara.compile.builders import OpFromGraph
-from aesara.graph import node_rewriter
-from aesara.graph.basic import Node, clone_replace
-from aesara.graph.rewriting.basic import in2out
-from aesara.graph.utils import MetaType
-from aesara.tensor.basic import as_tensor_variable
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.random.type import RandomType
-from aesara.tensor.var import TensorVariable
+from pytensor import tensor as at
+from pytensor.compile.builders import OpFromGraph
+from pytensor.graph import node_rewriter
+from pytensor.graph.basic import Node, clone_replace
+from pytensor.graph.rewriting.basic import in2out
+from pytensor.graph.utils import MetaType
+from pytensor.tensor.basic import as_tensor_variable
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.type import RandomType
+from pytensor.tensor.var import TensorVariable
 from typing_extensions import TypeAlias
 
-from pymc.aesaraf import convert_observed_data
 from pymc.distributions.shape_utils import (
     Dims,
     Shape,
@@ -54,6 +53,7 @@
 )
 from pymc.logprob.rewriting import logprob_rewrites_db
 from pymc.printing import str_for_dist
+from pymc.pytensorf import convert_observed_data
 from pymc.util import UNSET, _add_future_warning_tag
 from pymc.vartypes import string_types
 
@@ -150,7 +150,7 @@ def icdf(op, value, *dist_params, **kwargs):
                 def moment(op, rv, rng, size, dtype, *dist_params):
                     return class_moment(rv, size, *dist_params)
 
-            # Register the Aesara `RandomVariable` type as a subclass of this
+            # Register the PyTensor `RandomVariable` type as a subclass of this
             # `Distribution` type.
             new_cls.register(rv_type)
 
@@ -203,7 +203,7 @@ def update(self, node: Node):
 
         Returns a dictionary with the symbolic expressions required for correct updating
         of random state input variables repeated function evaluations. This is used by
-        `aesaraf.compile_pymc`.
+        `pytensorf.compile_pymc`.
         """
         return {}
 
@@ -256,7 +256,7 @@ def __new__(
         transform : optional
             See ``Model.register_rv``.
         **kwargs
-            Keyword arguments that will be forwarded to ``.dist()`` or the Aesara RV Op.
+            Keyword arguments that will be forwarded to ``.dist()`` or the PyTensor RV Op.
             Most prominently: ``shape`` for ``.dist()`` or ``dtype`` for the Op.
 
         Returns
@@ -340,7 +340,7 @@ def dist(
         shape : int, tuple, Variable, optional
             A tuple of sizes for each dimension of the new RV.
         **kwargs
-            Keyword arguments that will be forwarded to the Aesara RV Op.
+            Keyword arguments that will be forwarded to the PyTensor RV Op.
             Most prominently: ``size`` or ``dtype``.
 
         Returns
@@ -353,7 +353,7 @@ def dist(
             warnings.warn(
                 "The `.dist(testval=...)` argument is deprecated and has no effect. "
                 "Initial values for sampling/optimization can be specified with `initval` in a modelcontext. "
-                "For using Aesara's test value features, you must assign the `.tag.test_value` yourself.",
+                "For using PyTensor's test value features, you must assign the `.tag.test_value` yourself.",
                 FutureWarning,
                 stacklevel=2,
             )
@@ -486,7 +486,7 @@ class DensityDist(Distribution):
     name : str
     dist_params : Tuple
         A sequence of the distribution's parameter. These will be converted into
-        Aesara tensors internally. These parameters could be other ``TensorVariable``
+        PyTensor tensors internally. These parameters could be other ``TensorVariable``
         instances created from , optionally created via ``RandomVariable`` ``Op``s.
     class_name : str
         Name for the RandomVariable class which will wrap the DensityDist methods.
@@ -501,17 +501,17 @@ class DensityDist(Distribution):
         A callable that calculates the log density of some given observed ``value``
         conditioned on certain distribution parameter values. It must have the
         following signature: ``logp(value, *dist_params)``, where ``value`` is
-        an Aesara tensor that represents the observed value, and ``dist_params``
+        an PyTensor tensor that represents the observed value, and ``dist_params``
         are the tensors that hold the values of the distribution parameters.
-        This function must return an Aesara tensor. If ``None``, a ``NotImplemented``
+        This function must return an PyTensor tensor. If ``None``, a ``NotImplemented``
         error will be raised when trying to compute the distribution's logp.
     logcdf : Optional[Callable]
         A callable that calculates the log cummulative probability of some given observed
         ``value`` conditioned on certain distribution parameter values. It must have the
         following signature: ``logcdf(value, *dist_params)``, where ``value`` is
-        an Aesara tensor that represents the observed value, and ``dist_params``
+        an PyTensor tensor that represents the observed value, and ``dist_params``
         are the tensors that hold the values of the distribution parameters.
-        This function must return an Aesara tensor. If ``None``, a ``NotImplemented``
+        This function must return an PyTensor tensor. If ``None``, a ``NotImplemented``
         error will be raised when trying to compute the distribution's logcdf.
     random : Optional[Callable]
         A callable that can be used to generate random draws from the distribution.
@@ -526,7 +526,7 @@ class DensityDist(Distribution):
     moment : Optional[Callable]
         A callable that can be used to compute the moments of the distribution.
         It must have the following signature: ``moment(rv, size, *rv_inputs)``.
-        The distribution's :class:`~aesara.tensor.random.op.RandomVariable` is passed
+        The distribution's :class:`~pytensor.tensor.random.op.RandomVariable` is passed
         as the first argument ``rv``. ``size`` is the random variable's size implied
         by the ``dims``, ``size`` and parameters supplied to the distribution. Finally,
         ``rv_inputs`` is the sequence of the distribution parameters, in the same order
diff --git a/pymc/distributions/logprob.py b/pymc/distributions/logprob.py
index 1b431b8ee..62b21bb0e 100644
--- a/pymc/distributions/logprob.py
+++ b/pymc/distributions/logprob.py
@@ -15,19 +15,19 @@
 
 from typing import Dict, List, Sequence, Union
 
-import aesara
 import numpy as np
+import pytensor
 
-from aesara import tensor as at
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.var import TensorVariable
+from pytensor import tensor as at
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.var import TensorVariable
 
-from pymc.aesaraf import floatX
 from pymc.logprob.abstract import assign_custom_measurable_outputs
 from pymc.logprob.abstract import logcdf as logcdf_logprob
 from pymc.logprob.abstract import logprob as logp_logprob
 from pymc.logprob.joint_logprob import factorized_joint_logprob
 from pymc.logprob.transforms import RVTransform, TransformValuesRewrite
+from pymc.pytensorf import floatX
 
 TOTAL_SIZE = Union[int, Sequence[int], None]
 
@@ -97,7 +97,7 @@ def _get_scaling(total_size: TOTAL_SIZE, shape, ndim: int) -> TensorVariable:
         raise TypeError(
             "Unrecognized `total_size` type, expected int or list of ints, got %r" % total_size
         )
-    return at.as_tensor(coef, dtype=aesara.config.floatX)
+    return at.as_tensor(coef, dtype=pytensor.config.floatX)
 
 
 def _check_no_rvs(logp_terms: Sequence[TensorVariable]):
@@ -107,7 +107,7 @@ def _check_no_rvs(logp_terms: Sequence[TensorVariable]):
 
     unexpected_rv_nodes = [
         node
-        for node in aesara.graph.ancestors(logp_terms)
+        for node in pytensor.graph.ancestors(logp_terms)
         if (
             node.owner
             and isinstance(node.owner.op, RandomVariable)
diff --git a/pymc/distributions/mixture.py b/pymc/distributions/mixture.py
index f62c534b9..a61d9151b 100644
--- a/pymc/distributions/mixture.py
+++ b/pymc/distributions/mixture.py
@@ -13,13 +13,13 @@
 #   limitations under the License.
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.basic import Node, equal_computations
-from aesara.tensor import TensorVariable
-from aesara.tensor.random.op import RandomVariable
+from pytensor.graph.basic import Node, equal_computations
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.random.op import RandomVariable
 
 from pymc.distributions import transforms
 from pymc.distributions.continuous import Normal, get_tau_sigma
@@ -210,7 +210,7 @@ def dist(cls, w, comp_dists, **kwargs):
     @classmethod
     def rv_op(cls, weights, *components, size=None):
         # Create new rng for the mix_indexes internal RV
-        mix_indexes_rng = aesara.shared(np.random.default_rng())
+        mix_indexes_rng = pytensor.shared(np.random.default_rng())
 
         single_component = len(components) == 1
         ndim_supp = components[0].owner.op.ndim_supp
diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py
index 4ffcc019a..1805e83e1 100644
--- a/pymc/distributions/multivariate.py
+++ b/pymc/distributions/multivariate.py
@@ -20,27 +20,26 @@
 from functools import reduce
 from typing import Optional
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import scipy
 
-from aesara.graph.basic import Apply, Constant, Variable
-from aesara.graph.op import Op
-from aesara.raise_op import Assert
-from aesara.sparse.basic import sp_sum
-from aesara.tensor import TensorConstant, gammaln, sigmoid
-from aesara.tensor.nlinalg import det, eigh, matrix_inverse, trace
-from aesara.tensor.random.basic import dirichlet, multinomial, multivariate_normal
-from aesara.tensor.random.op import RandomVariable, default_supp_shape_from_params
-from aesara.tensor.random.utils import broadcast_params
-from aesara.tensor.slinalg import Cholesky, SolveTriangular
-from aesara.tensor.type import TensorType
+from pytensor.graph.basic import Apply, Constant, Variable
+from pytensor.graph.op import Op
+from pytensor.raise_op import Assert
+from pytensor.sparse.basic import sp_sum
+from pytensor.tensor import TensorConstant, gammaln, sigmoid
+from pytensor.tensor.nlinalg import det, eigh, matrix_inverse, trace
+from pytensor.tensor.random.basic import dirichlet, multinomial, multivariate_normal
+from pytensor.tensor.random.op import RandomVariable, default_supp_shape_from_params
+from pytensor.tensor.random.utils import broadcast_params
+from pytensor.tensor.slinalg import Cholesky, SolveTriangular
+from pytensor.tensor.type import TensorType
 from scipy import linalg, stats
 
 import pymc as pm
 
-from pymc.aesaraf import floatX, intX
 from pymc.distributions import transforms
 from pymc.distributions.continuous import BoundedContinuous, ChiSquared, Normal
 from pymc.distributions.dist_math import (
@@ -70,6 +69,7 @@
 from pymc.distributions.transforms import Interval, ZeroSumTransform, _default_transform
 from pymc.logprob.abstract import _logprob
 from pymc.math import kron_diag, kron_dot
+from pymc.pytensorf import floatX, intX
 from pymc.util import check_dist_not_registered
 
 __all__ = [
@@ -126,7 +126,7 @@ def quaddist_matrix(cov=None, chol=None, tau=None, lower=True, *args, **kwargs):
         if tau.ndim != 2:
             raise ValueError("tau must be two dimensional.")
         # TODO: What's the correct order/approach (in the non-square case)?
-        # `aesara.tensor.nlinalg.tensorinv`?
+        # `pytensor.tensor.nlinalg.tensorinv`?
         cov = matrix_inverse(tau)
     else:
         # TODO: What's the correct order/approach (in the non-square case)?
@@ -260,7 +260,7 @@ class MvNormal(Continuous):
     def dist(cls, mu, cov=None, tau=None, chol=None, lower=True, **kwargs):
         mu = at.as_tensor_variable(mu)
         cov = quaddist_matrix(cov, chol, tau, lower)
-        # Aesara is stricter about the shape of mu, than PyMC used to be
+        # PyTensor is stricter about the shape of mu, than PyMC used to be
         mu = at.broadcast_arrays(mu, cov[..., -1])[0]
         return super().dist([mu, cov], **kwargs)
 
@@ -311,7 +311,7 @@ def make_node(self, rng, size, dtype, nu, mu, cov):
 
     def __call__(self, nu, mu=None, cov=None, size=None, **kwargs):
 
-        dtype = aesara.config.floatX if self.dtype == "floatX" else self.dtype
+        dtype = pytensor.config.floatX if self.dtype == "floatX" else self.dtype
 
         if mu is None:
             mu = np.array([0.0], dtype=dtype)
@@ -400,7 +400,7 @@ def dist(cls, nu, Sigma=None, mu=None, scale=None, tau=None, chol=None, lower=Tr
         nu = at.as_tensor_variable(floatX(nu))
         mu = at.as_tensor_variable(floatX(mu))
         scale = quaddist_matrix(scale, chol, tau, lower)
-        # Aesara is stricter about the shape of mu, than PyMC used to be
+        # PyTensor is stricter about the shape of mu, than PyMC used to be
         mu = at.broadcast_arrays(mu, scale[..., -1])[0]
 
         return super().dist([nu, mu, scale], **kwargs)
@@ -880,7 +880,7 @@ def infer_shape(self, fgraph, node, shapes):
 
     def grad(self, inp, grads):
         (x,) = inp
-        return [x.zeros_like(aesara.config.floatX)]
+        return [x.zeros_like(pytensor.config.floatX)]
 
     def __str__(self):
         return "MatrixIsPositiveDefinite"
@@ -1219,7 +1219,7 @@ def rv_op(cls, n, eta, sd_dist, size=None):
             sd_dist = change_dist_size(sd_dist, shape[:-1])
 
         # Create new rng for the _lkjcholeskycov internal RV
-        rng = aesara.shared(np.random.default_rng())
+        rng = pytensor.shared(np.random.default_rng())
 
         rng_, n_, eta_, sd_dist_ = rng.type(), n.type(), eta.type(), sd_dist.type()
         next_rng_, lkjcov_ = _ljk_cholesky_cov_base(n_, eta_, sd_dist_, rng=rng_).owner.outputs
@@ -1605,7 +1605,7 @@ def logp(value, n, eta):
         TensorVariable
         """
 
-        # TODO: Aesara does not have a `triu_indices`, so we can only work with constant
+        # TODO: PyTensor does not have a `triu_indices`, so we can only work with constant
         #  n (or else find a different expression)
         if not isinstance(n, Constant):
             raise NotImplementedError("logp only implemented for constant `n`")
@@ -2059,15 +2059,15 @@ class CARRV(RandomVariable):
     def make_node(self, rng, size, dtype, mu, W, alpha, tau):
         mu = at.as_tensor_variable(floatX(mu))
 
-        W = aesara.sparse.as_sparse_or_tensor_variable(floatX(W))
+        W = pytensor.sparse.as_sparse_or_tensor_variable(floatX(W))
         if not W.ndim == 2:
             raise ValueError("W must be a matrix (ndim=2).")
 
-        sparse = isinstance(W, aesara.sparse.SparseVariable)
+        sparse = isinstance(W, pytensor.sparse.SparseVariable)
         msg = "W must be a symmetric adjacency matrix."
         if sparse:
-            abs_diff = aesara.sparse.basic.mul(aesara.sparse.basic.sgn(W - W.T), W - W.T)
-            W = Assert(msg)(W, at.isclose(aesara.sparse.basic.sp_sum(abs_diff), 0))
+            abs_diff = pytensor.sparse.basic.mul(pytensor.sparse.basic.sgn(W - W.T), W - W.T)
+            W = Assert(msg)(W, at.isclose(pytensor.sparse.basic.sp_sum(abs_diff), 0))
         else:
             W = Assert(msg)(W, at.allclose(W, W.T))
 
@@ -2151,7 +2151,7 @@ class CAR(Continuous):
         Symmetric adjacency matrix of 1s and 0s indicating
         adjacency between elements. If possible, *W* is converted
         to a sparse matrix, falling back to a dense variable.
-        :func:`~aesara.sparse.basic.as_sparse_or_tensor_variable` is
+        :func:`~pytensor.sparse.basic.as_sparse_or_tensor_variable` is
         used for this sparse or tensorvariable conversion.
     alpha : tensor_like of float
         Autoregression parameter taking values between -1 and 1. Values closer to 0 indicate weaker
@@ -2192,12 +2192,12 @@ def logp(value, mu, W, alpha, tau):
         TensorVariable
         """
 
-        sparse = isinstance(W, (aesara.sparse.SparseConstant, aesara.sparse.SparseVariable))
+        sparse = isinstance(W, (pytensor.sparse.SparseConstant, pytensor.sparse.SparseVariable))
 
         if sparse:
             D = sp_sum(W, axis=0)
             Dinv_sqrt = at.diag(1 / at.sqrt(D))
-            DWD = at.dot(aesara.sparse.dot(Dinv_sqrt, W), Dinv_sqrt)
+            DWD = at.dot(pytensor.sparse.dot(Dinv_sqrt, W), Dinv_sqrt)
         else:
             D = W.sum(axis=0)
             Dinv_sqrt = at.diag(1 / at.sqrt(D))
@@ -2214,7 +2214,7 @@ def logp(value, mu, W, alpha, tau):
         delta = value - mu
 
         if sparse:
-            Wdelta = aesara.sparse.dot(delta, W)
+            Wdelta = pytensor.sparse.dot(delta, W)
         else:
             Wdelta = at.dot(delta, W)
 
diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
index 666cde5c1..c210c58f6 100644
--- a/pymc/distributions/shape_utils.py
+++ b/pymc/distributions/shape_utils.py
@@ -24,18 +24,18 @@
 
 import numpy as np
 
-from aesara import config
-from aesara import tensor as at
-from aesara.graph.basic import Variable
-from aesara.graph.op import Op, compute_test_value
-from aesara.raise_op import Assert
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.shape import SpecifyShape
-from aesara.tensor.var import TensorVariable
+from pytensor import config
+from pytensor import tensor as at
+from pytensor.graph.basic import Variable
+from pytensor.graph.op import Op, compute_test_value
+from pytensor.raise_op import Assert
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.shape import SpecifyShape
+from pytensor.tensor.var import TensorVariable
 from typing_extensions import TypeAlias
 
-from pymc.aesaraf import convert_observed_data
 from pymc.model import modelcontext
+from pymc.pytensorf import convert_observed_data
 
 __all__ = [
     "to_tuple",
@@ -48,8 +48,8 @@
     "change_dist_size",
 ]
 
-from pymc.aesaraf import PotentialShapeType
 from pymc.exceptions import ShapeError
+from pymc.pytensorf import PotentialShapeType
 from pymc.util import _add_future_warning_tag
 
 
@@ -661,7 +661,7 @@ def change_specify_shape_size(op, ss, new_size, expand) -> TensorVariable:
         if ndim_supp > 0:
             new_shapes[-ndim_supp:] = shapes[-ndim_supp:]
 
-    # specify_shape has a wrong signature https://github.com/aesara-devs/aesara/issues/1164
+    # specify_shape has a wrong signature https://github.com/pytensor-devs/pytensor/issues/1164
     return at.specify_shape(new_var, new_shapes)  # type: ignore
 
 
diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py
index e180350d3..a15bddce0 100644
--- a/pymc/distributions/simulator.py
+++ b/pymc/distributions/simulator.py
@@ -14,18 +14,18 @@
 
 import logging
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.op import Apply, Op
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.var import TensorVariable
+from pytensor.graph.op import Apply, Op
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.var import TensorVariable
 from scipy.spatial import cKDTree
 
-from pymc.aesaraf import floatX
 from pymc.distributions.distribution import Distribution, _moment
 from pymc.logprob.abstract import _logprob
+from pymc.pytensorf import floatX
 
 __all__ = ["Simulator"]
 
@@ -95,9 +95,9 @@ class Simulator(Distribution):
             different methods across separate models, be sure to use distinct
             class_names.
 
-    distance : Aesara_Op, callable or str, default "gaussian"
+    distance : PyTensor_Op, callable or str, default "gaussian"
         Distance function. Available options are ``"gaussian"``, ``"laplace"``,
-        ``"kullback_leibler"`` or a user defined function (or Aesara_Op) that takes
+        ``"kullback_leibler"`` or a user defined function (or PyTensor_Op) that takes
         ``epsilon``, the summary statistics of observed_data and the summary statistics
         of simulated_data as input.
 
@@ -110,10 +110,10 @@ class Simulator(Distribution):
         ``distance="gaussian"`` + ``sum_stat="sort"`` is equivalent to the 1D 2-wasserstein distance
 
         ``distance="laplace"`` + ``sum_stat="sort"`` is equivalent to the the 1D 1-wasserstein distance
-    sum_stat : Aesara_Op, callable or str, default "identity"
+    sum_stat : PyTensor_Op, callable or str, default "identity"
         Summary statistic function. Available options are ``"identity"``,
-        ``"sort"``, ``"mean"``, ``"median"``. If a callable (or Aesara_Op) is defined,
-        it should return a 1d numpy array (or Aesara vector).
+        ``"sort"``, ``"mean"``, ``"median"``. If a callable (or PyTensor_Op) is defined,
+        it should return a 1d numpy array (or PyTensor vector).
     epsilon : tensor_like of float, default 1.0
         Scaling parameter for the distance functions. It should be a float or
         an array of the same size of the output of ``sum_stat``.
@@ -175,7 +175,7 @@ def dist(  # type: ignore
                 distance = laplace
             elif distance == "kullback_leibler":
                 raise NotImplementedError("KL not refactored yet")
-                # TODO: Wrap KL in aesara OP
+                # TODO: Wrap KL in pytensor OP
                 # distance = KullbackLeibler(observed)
                 # if sum_stat != "identity":
                 #     _log.info(f"Automatically setting sum_stat to identity as expected by {distance}")
@@ -193,7 +193,7 @@ def dist(  # type: ignore
             elif sum_stat == "mean":
                 sum_stat = at.mean
             elif sum_stat == "median":
-                # Missing in Aesara, see aesara/issues/525
+                # Missing in PyTensor, see pytensor/issues/525
                 sum_stat = create_sum_stat_op_from_fn(np.median)
             elif callable(sum_stat):
                 sum_stat = create_sum_stat_op_from_fn(sum_stat)
@@ -273,7 +273,7 @@ def simulator_logp(op, values, *inputs, **kwargs):
     # TODO: Model rngs should be updated prior to multiprocessing split,
     #  in which case this would not be needed. However, that would have to be
     #  done for every sampler that may accomodate Simulators
-    rng = aesara.shared(np.random.default_rng(), name="simulator_rng")
+    rng = pytensor.shared(np.random.default_rng(), name="simulator_rng")
     # Create a new simulatorRV with identical inputs as the original one
     sim_value = op.make_node(rng, *inputs[1:]).default_output()
     sim_value.name = "simulator_value"
@@ -321,7 +321,7 @@ def __call__(self, epsilon, obs_data, sim_data):
 
 
 def create_sum_stat_op_from_fn(fn):
-    vectorX = at.dvector if aesara.config.floatX == "float64" else at.fvector
+    vectorX = at.dvector if pytensor.config.floatX == "float64" else at.fvector
 
     # Check if callable returns TensorVariable with dummy inputs
     try:
@@ -331,7 +331,7 @@ def create_sum_stat_op_from_fn(fn):
     except Exception:
         pass
 
-    # Otherwise, automatically wrap in Aesara Op
+    # Otherwise, automatically wrap in PyTensor Op
     class SumStat(Op):
         def make_node(self, x):
             x = at.as_tensor_variable(x)
@@ -339,14 +339,14 @@ def make_node(self, x):
 
         def perform(self, node, inputs, outputs):
             (x,) = inputs
-            outputs[0][0] = np.atleast_1d(fn(x)).astype(aesara.config.floatX)
+            outputs[0][0] = np.atleast_1d(fn(x)).astype(pytensor.config.floatX)
 
     return SumStat()
 
 
 def create_distance_op_from_fn(fn):
-    scalarX = at.dscalar if aesara.config.floatX == "float64" else at.fscalar
-    vectorX = at.dvector if aesara.config.floatX == "float64" else at.fvector
+    scalarX = at.dscalar if pytensor.config.floatX == "float64" else at.fscalar
+    vectorX = at.dvector if pytensor.config.floatX == "float64" else at.fvector
 
     # Check if callable returns TensorVariable with dummy inputs
     try:
@@ -356,7 +356,7 @@ def create_distance_op_from_fn(fn):
     except Exception:
         pass
 
-    # Otherwise, automatically wrap in Aesara Op
+    # Otherwise, automatically wrap in PyTensor Op
     class Distance(Op):
         def make_node(self, epsilon, obs_data, sim_data):
             epsilon = at.as_tensor_variable(epsilon)
@@ -366,6 +366,8 @@ def make_node(self, epsilon, obs_data, sim_data):
 
         def perform(self, node, inputs, outputs):
             eps, obs_data, sim_data = inputs
-            outputs[0][0] = np.atleast_1d(fn(eps, obs_data, sim_data)).astype(aesara.config.floatX)
+            outputs[0][0] = np.atleast_1d(fn(eps, obs_data, sim_data)).astype(
+                pytensor.config.floatX
+            )
 
     return Distance()
diff --git a/pymc/distributions/timeseries.py b/pymc/distributions/timeseries.py
index 3aed94a4d..1af675126 100644
--- a/pymc/distributions/timeseries.py
+++ b/pymc/distributions/timeseries.py
@@ -17,15 +17,14 @@
 from abc import ABCMeta
 from typing import Callable, Optional
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.basic import Node, clone_replace
-from aesara.tensor import TensorVariable
-from aesara.tensor.random.op import RandomVariable
+from pytensor.graph.basic import Node, clone_replace
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.random.op import RandomVariable
 
-from pymc.aesaraf import constant_fold, floatX, intX
 from pymc.distributions.continuous import Normal, get_tau_sigma
 from pymc.distributions.distribution import (
     Distribution,
@@ -43,6 +42,7 @@
 )
 from pymc.exceptions import NotConstantValueError
 from pymc.logprob.abstract import _logprob
+from pymc.pytensorf import constant_fold, floatX, intX
 from pymc.util import check_dist_not_registered
 
 __all__ = [
@@ -572,7 +572,7 @@ def _get_ar_order(cls, rhos: TensorVariable, ar_order: Optional[int], constant:
 
         If ar_order is not specified we do constant folding on the shape of rhos
         to retrieve it. For example, this will detect that
-        Normal(size=(5, 3)).shape[-1] == 3, which is not known by Aesara before.
+        Normal(size=(5, 3)).shape[-1] == 3, which is not known by PyTensor before.
 
         Raises
         ------
@@ -629,7 +629,7 @@ def rv_op(cls, rhos, sigma, init_dist, steps, ar_order, constant_term, size=None
             rhos_bcast_shape_ = (*rhos_bcast_shape_[:-1], rhos_bcast_shape_[-1] + 1)
         rhos_bcast_ = at.broadcast_to(rhos_, rhos_bcast_shape_)
 
-        noise_rng = aesara.shared(np.random.default_rng())
+        noise_rng = pytensor.shared(np.random.default_rng())
 
         def step(*args):
             *prev_xs, reversed_rhos, sigma, rng = args
@@ -641,7 +641,7 @@ def step(*args):
             return new_x, {rng: next_rng}
 
         # We transpose inputs as scan iterates over first dimension
-        innov_, innov_updates_ = aesara.scan(
+        innov_, innov_updates_ = pytensor.scan(
             fn=step,
             outputs_info=[{"initial": init_.T, "taps": range(-ar_order, 0)}],
             non_sequences=[rhos_bcast_.T[::-1], sigma_.T, noise_rng],
@@ -804,7 +804,7 @@ def rv_op(cls, omega, alpha_1, beta_1, initial_vol, init_dist, steps, size=None)
         beta_1_ = beta_1.type()
         steps_ = steps.type()
 
-        noise_rng = aesara.shared(np.random.default_rng())
+        noise_rng = pytensor.shared(np.random.default_rng())
 
         def step(prev_y, prev_sigma, omega, alpha_1, beta_1, rng):
             new_sigma = at.sqrt(
@@ -813,7 +813,7 @@ def step(prev_y, prev_sigma, omega, alpha_1, beta_1, rng):
             next_rng, new_y = Normal.dist(mu=0, sigma=new_sigma, rng=rng).owner.outputs
             return (new_y, new_sigma), {rng: next_rng}
 
-        (y_t, _), innov_updates_ = aesara.scan(
+        (y_t, _), innov_updates_ = pytensor.scan(
             fn=step,
             outputs_info=[init_, initial_vol_ * at.ones(batch_size)],
             non_sequences=[omega_, alpha_1_, beta_1_, noise_rng],
@@ -861,7 +861,7 @@ def garch11_logp(
     def volatility_update(x, vol, w, a, b):
         return at.sqrt(w + a * at.square(x) + b * at.square(vol))
 
-    vol, _ = aesara.scan(
+    vol, _ = pytensor.scan(
         fn=volatility_update,
         sequences=[value_dimswapped[:-1]],
         outputs_info=[initial_vol],
@@ -986,7 +986,7 @@ def rv_op(cls, init_dist, steps, sde_pars, dt, sde_fn, size=None):
         sde_pars_ = [x.type() for x in sde_pars]
         steps_ = steps.type()
 
-        noise_rng = aesara.shared(np.random.default_rng())
+        noise_rng = pytensor.shared(np.random.default_rng())
 
         def step(*prev_args):
             prev_y, *prev_sde_pars, rng = prev_args
@@ -996,7 +996,7 @@ def step(*prev_args):
             next_rng, next_y = Normal.dist(mu=mu, sigma=sigma, rng=rng).owner.outputs
             return next_y, {rng: next_rng}
 
-        y_t, innov_updates_ = aesara.scan(
+        y_t, innov_updates_ = pytensor.scan(
             fn=step,
             outputs_info=[init_],
             non_sequences=sde_pars_ + [noise_rng],
diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py
index a693441e6..3a2fde123 100644
--- a/pymc/distributions/transforms.py
+++ b/pymc/distributions/transforms.py
@@ -13,15 +13,14 @@
 #   limitations under the License.
 from functools import singledispatch
 
-import aesara.tensor as at
 import numpy as np
-
-from aesara.graph import Op
-from aesara.tensor import TensorVariable
+import pytensor.tensor as at
 
 # ignore mypy error because it somehow considers that
 # "numpy.core.numeric has no attribute normalize_axis_tuple"
 from numpy.core.numeric import normalize_axis_tuple  # type: ignore
+from pytensor.graph import Op
+from pytensor.tensor import TensorVariable
 
 from pymc.logprob.transforms import (
     CircularTransform,
diff --git a/pymc/distributions/truncated.py b/pymc/distributions/truncated.py
index 288da18c8..3e5eec406 100644
--- a/pymc/distributions/truncated.py
+++ b/pymc/distributions/truncated.py
@@ -1,17 +1,17 @@
 from functools import singledispatch
 
-import aesara
-import aesara.tensor as at
 import numpy as np
-
-from aesara import scan
-from aesara.graph import Op
-from aesara.graph.basic import Node
-from aesara.raise_op import CheckAndRaise
-from aesara.scan import until
-from aesara.tensor import TensorConstant, TensorVariable
-from aesara.tensor.random.basic import NormalRV
-from aesara.tensor.random.op import RandomVariable
+import pytensor
+import pytensor.tensor as at
+
+from pytensor import scan
+from pytensor.graph import Op
+from pytensor.graph.basic import Node
+from pytensor.raise_op import CheckAndRaise
+from pytensor.scan import until
+from pytensor.tensor import TensorConstant, TensorVariable
+from pytensor.tensor.random.basic import NormalRV
+from pytensor.tensor.random.op import RandomVariable
 
 from pymc.distributions.continuous import TruncatedNormal, bounded_cont_transform
 from pymc.distributions.dist_math import check_parameters
@@ -30,7 +30,7 @@
 
 
 class TruncatedRV(SymbolicRandomVariable):
-    """An `Op` constructed from an Aesara graph that represents a truncated univariate
+    """An `Op` constructed from an PyTensor graph that represents a truncated univariate
     random variable."""
 
     default_output = 1
@@ -167,7 +167,7 @@ def rv_op(cls, dist, lower, upper, max_n_steps, size=None):
 
         # We will use a Shared RNG variable because Scan demands it, even though it
         # would not be necessary for the OpFromGraph inverse cdf.
-        rng = aesara.shared(np.random.default_rng())
+        rng = pytensor.shared(np.random.default_rng())
         rv_ = dist.owner.op.make_node(rng, *rv_inputs_).default_output()
 
         # Try to use inverted cdf sampling
diff --git a/pymc/func_utils.py b/pymc/func_utils.py
index 1aa6334ee..9d46b5729 100644
--- a/pymc/func_utils.py
+++ b/pymc/func_utils.py
@@ -13,10 +13,10 @@
 #   limitations under the License.
 from typing import Callable, Dict, Optional, Union
 
-import aesara.tensor as aet
 import numpy as np
+import pytensor.tensor as aet
 
-from aesara.gradient import NullTypeGradError
+from pytensor.gradient import NullTypeGradError
 from scipy import optimize
 
 import pymc as pm
@@ -160,19 +160,19 @@ def find_constrained_prior(
         )
 
     target = (aet.exp(logcdf_lower) - mass_below_lower) ** 2
-    target_fn = pm.aesaraf.compile_pymc([dist_params], target, allow_input_downcast=True)
+    target_fn = pm.pytensorf.compile_pymc([dist_params], target, allow_input_downcast=True)
 
     constraint = aet.exp(logcdf_upper) - aet.exp(logcdf_lower)
-    constraint_fn = pm.aesaraf.compile_pymc([dist_params], constraint, allow_input_downcast=True)
+    constraint_fn = pm.pytensorf.compile_pymc([dist_params], constraint, allow_input_downcast=True)
 
     jac: Union[str, Callable]
     constraint_jac: Union[str, Callable]
     try:
-        aesara_jac = pm.gradient(target, [dist_params])
-        jac = pm.aesaraf.compile_pymc([dist_params], aesara_jac, allow_input_downcast=True)
-        aesara_constraint_jac = pm.gradient(constraint, [dist_params])
-        constraint_jac = pm.aesaraf.compile_pymc(
-            [dist_params], aesara_constraint_jac, allow_input_downcast=True
+        pytensor_jac = pm.gradient(target, [dist_params])
+        jac = pm.pytensorf.compile_pymc([dist_params], pytensor_jac, allow_input_downcast=True)
+        pytensor_constraint_jac = pm.gradient(constraint, [dist_params])
+        constraint_jac = pm.pytensorf.compile_pymc(
+            [dist_params], pytensor_constraint_jac, allow_input_downcast=True
         )
     # when PyMC cannot compute the gradient
     except (NotImplementedError, NullTypeGradError):
diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index 2894e7cd9..edb7f93be 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -18,13 +18,13 @@
 from numbers import Number
 from operator import add, mul
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.basic import Variable
-from aesara.tensor.sharedvar import TensorSharedVariable
-from aesara.tensor.var import TensorConstant, TensorVariable
+from pytensor.graph.basic import Variable
+from pytensor.tensor.sharedvar import TensorSharedVariable
+from pytensor.tensor.var import TensorConstant, TensorVariable
 
 __all__ = [
     "Constant",
@@ -122,7 +122,7 @@ def __rmul__(self, other):
 
     def __pow__(self, other):
         if (
-            isinstance(other, aesara.compile.SharedVariable)
+            isinstance(other, pytensor.compile.SharedVariable)
             and other.get_value().squeeze().shape == ()
         ):
             other = at.squeeze(other)
@@ -606,7 +606,7 @@ def diag(self, X):
 class WarpedInput(Covariance):
     r"""
     Warp the inputs of any kernel using an arbitrary function
-    defined using Aesara.
+    defined using PyTensor.
 
     .. math::
        k(x, x') = k(w(x), w(x'))
@@ -615,7 +615,7 @@ class WarpedInput(Covariance):
     ----------
     cov_func: Covariance
     warp_func: callable
-        Aesara function of X and additional optional arguments.
+        PyTensor function of X and additional optional arguments.
     args: optional, tuple or list of scalars or PyMC variables
         Additional inputs (besides X or Xs) to warp_func.
     """
@@ -645,7 +645,7 @@ def diag(self, X):
 class Gibbs(Covariance):
     r"""
     The Gibbs kernel.  Use an arbitrary lengthscale function defined
-    using Aesara.  Only tested in one dimension.
+    using PyTensor.  Only tested in one dimension.
 
     .. math::
        k(x, x') = \sqrt{\frac{2\ell(x)\ell(x')}{\ell^2(x) + \ell^2(x')}}
@@ -655,7 +655,7 @@ class Gibbs(Covariance):
     Parameters
     ----------
     lengthscale_func: callable
-        Aesara function of X and additional optional arguments.
+        PyTensor function of X and additional optional arguments.
     args: optional, tuple or list of scalars or PyMC variables
         Additional inputs (besides X or Xs) to lengthscale_func.
     """
@@ -706,7 +706,7 @@ def diag(self, X):
 class ScaledCov(Covariance):
     r"""
     Construct a kernel by multiplying a base kernel with a scaling
-    function defined using Aesara.  The scaling function is
+    function defined using PyTensor.  The scaling function is
     non-negative, and can be parameterized.
 
     .. math::
@@ -717,7 +717,7 @@ class ScaledCov(Covariance):
     cov_func: Covariance
         Base kernel or covariance function
     scaling_func: callable
-        Aesara function of X and additional optional arguments.
+        PyTensor function of X and additional optional arguments.
     args: optional, tuple or list of scalars or PyMC variables
         Additional inputs (besides X or Xs) to lengthscale_func.
     """
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 61008381d..261c21bc8 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -14,10 +14,10 @@
 
 import warnings
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 
-from aesara.tensor.nlinalg import eigh
+from pytensor.tensor.nlinalg import eigh
 
 import pymc as pm
 
diff --git a/pymc/gp/mean.py b/pymc/gp/mean.py
index 37e12dcd2..0370b1293 100644
--- a/pymc/gp/mean.py
+++ b/pymc/gp/mean.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara.tensor as at
+import pytensor.tensor as at
 
 __all__ = ["Zero", "Constant", "Linear"]
 
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 8e109a7f6..9f13df836 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -14,22 +14,21 @@
 
 import warnings
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 
-from aesara.compile import SharedVariable
-from aesara.tensor.slinalg import (  # noqa: W0611; pylint: disable=unused-import
+from pytensor.compile import SharedVariable
+from pytensor.tensor.slinalg import (  # noqa: W0611; pylint: disable=unused-import
     SolveTriangular,
     cholesky,
     solve,
 )
-from aesara.tensor.var import TensorConstant
+from pytensor.tensor.var import TensorConstant
 from scipy.cluster.vq import kmeans
 
-from pymc.aesaraf import compile_pymc, walk_model
-
 # Avoid circular dependency when importing modelcontext
 from pymc.distributions.distribution import Distribution
+from pymc.pytensorf import compile_pymc, walk_model
 
 _ = Distribution  # keep both pylint and black happy
 from pymc.model import modelcontext
diff --git a/pymc/initial_point.py b/pymc/initial_point.py
index a0b2514dd..df2895e1b 100644
--- a/pymc/initial_point.py
+++ b/pymc/initial_point.py
@@ -16,16 +16,16 @@
 
 from typing import Callable, Dict, List, Optional, Sequence, Set, Union
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.basic import Variable
-from aesara.graph.fg import FunctionGraph
-from aesara.tensor.var import TensorVariable
+from pytensor.graph.basic import Variable
+from pytensor.graph.fg import FunctionGraph
+from pytensor.tensor.var import TensorVariable
 
-from pymc.aesaraf import compile_pymc, find_rng_nodes, replace_rng_nodes, reseed_rngs
 from pymc.logprob.transforms import RVTransform
+from pymc.pytensorf import compile_pymc, find_rng_nodes, replace_rng_nodes, reseed_rngs
 from pymc.util import get_transformed_name, get_untransformed_name, is_transformed_name
 
 StartDict = Dict[Union[Variable, str], Union[np.ndarray, Variable, str]]
@@ -149,7 +149,7 @@ def make_initial_point_fn(
     # Replace original rng shared variables so that we don't mess with them
     # when calling the final seeded function
     initial_values = replace_rng_nodes(initial_values)
-    func = compile_pymc(inputs=[], outputs=initial_values, mode=aesara.compile.mode.FAST_COMPILE)
+    func = compile_pymc(inputs=[], outputs=initial_values, mode=pytensor.compile.mode.FAST_COMPILE)
 
     varnames = []
     for var in model.free_RVs:
@@ -206,7 +206,7 @@ def make_initial_point_expression(
     Returns
     -------
     initial_points : list of TensorVariable
-        Aesara expressions for initial values of the free random variables.
+        PyTensor expressions for initial values of the free random variables.
     """
     from pymc.distributions.distribution import moment
 
diff --git a/pymc/logprob/abstract.py b/pymc/logprob/abstract.py
index e5818bd46..070f28a95 100644
--- a/pymc/logprob/abstract.py
+++ b/pymc/logprob/abstract.py
@@ -40,12 +40,12 @@
 from functools import singledispatch
 from typing import Callable, List, Sequence, Tuple
 
-from aesara.graph.basic import Apply, Variable
-from aesara.graph.op import Op
-from aesara.graph.utils import MetaType
-from aesara.tensor import TensorVariable
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.random.op import RandomVariable
+from pytensor.graph.basic import Apply, Variable
+from pytensor.graph.op import Op
+from pytensor.graph.utils import MetaType
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.random.op import RandomVariable
 
 
 def logprob(rv_var, *rv_values, **kwargs):
diff --git a/pymc/logprob/censoring.py b/pymc/logprob/censoring.py
index 8beeb9345..f03d052b5 100644
--- a/pymc/logprob/censoring.py
+++ b/pymc/logprob/censoring.py
@@ -36,16 +36,16 @@
 
 from typing import List, Optional
 
-import aesara.tensor as at
 import numpy as np
-
-from aesara.graph.basic import Node
-from aesara.graph.fg import FunctionGraph
-from aesara.graph.rewriting.basic import node_rewriter
-from aesara.scalar.basic import Ceil, Clip, Floor, RoundHalfToEven
-from aesara.scalar.basic import clip as scalar_clip
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.var import TensorConstant
+import pytensor.tensor as at
+
+from pytensor.graph.basic import Node
+from pytensor.graph.fg import FunctionGraph
+from pytensor.graph.rewriting.basic import node_rewriter
+from pytensor.scalar.basic import Ceil, Clip, Floor, RoundHalfToEven
+from pytensor.scalar.basic import clip as scalar_clip
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.var import TensorConstant
 
 from pymc.logprob.abstract import (
     MeasurableElemwise,
diff --git a/pymc/logprob/cumsum.py b/pymc/logprob/cumsum.py
index 5c070b8b0..5eacefdd2 100644
--- a/pymc/logprob/cumsum.py
+++ b/pymc/logprob/cumsum.py
@@ -36,10 +36,10 @@
 
 from typing import List, Optional
 
-import aesara.tensor as at
+import pytensor.tensor as at
 
-from aesara.graph.rewriting.basic import node_rewriter
-from aesara.tensor.extra_ops import CumOp
+from pytensor.graph.rewriting.basic import node_rewriter
+from pytensor.tensor.extra_ops import CumOp
 
 from pymc.logprob.abstract import (
     MeasurableVariable,
diff --git a/pymc/logprob/joint_logprob.py b/pymc/logprob/joint_logprob.py
index aa23a0268..fbf9d6fc5 100644
--- a/pymc/logprob/joint_logprob.py
+++ b/pymc/logprob/joint_logprob.py
@@ -39,13 +39,13 @@
 from collections import deque
 from typing import Dict, Optional, Union
 
-import aesara.tensor as at
+import pytensor.tensor as at
 
-from aesara import config
-from aesara.graph.basic import graph_inputs, io_toposort
-from aesara.graph.op import compute_test_value
-from aesara.graph.rewriting.basic import GraphRewriter, NodeRewriter
-from aesara.tensor.var import TensorVariable
+from pytensor import config
+from pytensor.graph.basic import graph_inputs, io_toposort
+from pytensor.graph.op import compute_test_value
+from pytensor.graph.rewriting.basic import GraphRewriter, NodeRewriter
+from pytensor.tensor.var import TensorVariable
 
 from pymc.logprob.abstract import _logprob, get_measurable_outputs
 from pymc.logprob.rewriting import construct_ir_fgraph
@@ -69,7 +69,7 @@ def factorized_joint_logprob(
 
     .. code-block:: python
 
-        import aesara.tensor as at
+        import pytensor.tensor as at
 
         sigma2_rv = at.random.invgamma(0.5, 0.5)
         Y_rv = at.random.normal(0, at.sqrt(sigma2_rv))
diff --git a/pymc/logprob/mixture.py b/pymc/logprob/mixture.py
index 973cc46b1..275b55e93 100644
--- a/pymc/logprob/mixture.py
+++ b/pymc/logprob/mixture.py
@@ -36,35 +36,35 @@
 
 from typing import List, Optional, Tuple, Union, cast
 
-import aesara
-import aesara.tensor as at
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.basic import Apply, Variable
-from aesara.graph.fg import FunctionGraph
-from aesara.graph.op import Op, compute_test_value
-from aesara.graph.rewriting.basic import (
+from pytensor.graph.basic import Apply, Variable
+from pytensor.graph.fg import FunctionGraph
+from pytensor.graph.op import Op, compute_test_value
+from pytensor.graph.rewriting.basic import (
     EquilibriumGraphRewriter,
     node_rewriter,
     pre_greedy_node_rewriter,
 )
-from aesara.ifelse import ifelse
-from aesara.scalar.basic import Switch
-from aesara.tensor.basic import Join, MakeVector
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.random.rewriting import (
+from pytensor.ifelse import ifelse
+from pytensor.scalar.basic import Switch
+from pytensor.tensor.basic import Join, MakeVector
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.random.rewriting import (
     local_dimshuffle_rv_lift,
     local_subtensor_rv_lift,
 )
-from aesara.tensor.shape import shape_tuple
-from aesara.tensor.subtensor import (
+from pytensor.tensor.shape import shape_tuple
+from pytensor.tensor.subtensor import (
     as_index_literal,
     as_nontensor_scalar,
     get_canonical_form_slice,
     is_basic_idx,
 )
-from aesara.tensor.type import TensorType
-from aesara.tensor.type_other import NoneConst, NoneTypeT, SliceType
-from aesara.tensor.var import TensorVariable
+from pytensor.tensor.type import TensorType
+from pytensor.tensor.type_other import NoneConst, NoneTypeT, SliceType
+from pytensor.tensor.var import TensorVariable
 
 from pymc.logprob.abstract import (
     MeasurableVariable,
@@ -263,7 +263,7 @@ def get_stack_mixture_vars(
         join_axis = joined_rvs.owner.inputs[0]
         try:
             # TODO: Find better solution to avoid this circular dependency
-            from pymc.aesaraf import constant_fold
+            from pymc.pytensorf import constant_fold
 
             join_axis = int(constant_fold((join_axis,))[0])
         except ValueError:
@@ -330,7 +330,7 @@ def mixture_replace(fgraph, node):
 
     new_mixture_rv = new_node.default_output()
 
-    if aesara.config.compute_test_value != "off":
+    if pytensor.config.compute_test_value != "off":
         # We can't use `MixtureRV` to compute a test value; instead, we'll use
         # the original node's test value.
         if not hasattr(old_mixture_rv.tag, "test_value"):
@@ -380,7 +380,7 @@ def switch_mixture_replace(fgraph, node):
 
     new_mixture_rv = new_node.default_output()
 
-    if aesara.config.compute_test_value != "off":
+    if pytensor.config.compute_test_value != "off":
         if not hasattr(old_mixture_rv.tag, "test_value"):
             compute_test_value(node)
 
@@ -417,7 +417,7 @@ def logprob_MixtureRV(
             original_shape = (len(comp_rvs),)
         else:
             # TODO: Find better solution to avoid this circular dependency
-            from pymc.aesaraf import constant_fold
+            from pymc.pytensorf import constant_fold
 
             join_axis_val = constant_fold((join_axis,))[0].item()
             original_shape = shape_tuple(comp_rvs[0])
@@ -459,7 +459,7 @@ def logprob_MixtureRV(
     "mixture_replace",
     EquilibriumGraphRewriter(
         [mixture_replace, switch_mixture_replace],
-        max_use_ratio=aesara.config.optdb__max_use_ratio,
+        max_use_ratio=pytensor.config.optdb__max_use_ratio,
     ),
     0,
     "basic",
diff --git a/pymc/logprob/rewriting.py b/pymc/logprob/rewriting.py
index f4712bfc5..7af01e976 100644
--- a/pymc/logprob/rewriting.py
+++ b/pymc/logprob/rewriting.py
@@ -36,20 +36,20 @@
 
 from typing import Dict, Optional, Tuple
 
-import aesara.tensor as at
-
-from aesara.compile.mode import optdb
-from aesara.graph.basic import Variable
-from aesara.graph.features import Feature
-from aesara.graph.fg import FunctionGraph
-from aesara.graph.rewriting.basic import GraphRewriter, node_rewriter
-from aesara.graph.rewriting.db import EquilibriumDB, RewriteDatabaseQuery, SequenceDB
-from aesara.tensor.elemwise import DimShuffle, Elemwise
-from aesara.tensor.extra_ops import BroadcastTo
-from aesara.tensor.random.rewriting import local_subtensor_rv_lift
-from aesara.tensor.rewriting.basic import register_canonicalize, register_useless
-from aesara.tensor.rewriting.shape import ShapeFeature
-from aesara.tensor.subtensor import (
+import pytensor.tensor as at
+
+from pytensor.compile.mode import optdb
+from pytensor.graph.basic import Variable
+from pytensor.graph.features import Feature
+from pytensor.graph.fg import FunctionGraph
+from pytensor.graph.rewriting.basic import GraphRewriter, node_rewriter
+from pytensor.graph.rewriting.db import EquilibriumDB, RewriteDatabaseQuery, SequenceDB
+from pytensor.tensor.elemwise import DimShuffle, Elemwise
+from pytensor.tensor.extra_ops import BroadcastTo
+from pytensor.tensor.random.rewriting import local_subtensor_rv_lift
+from pytensor.tensor.rewriting.basic import register_canonicalize, register_useless
+from pytensor.tensor.rewriting.shape import ShapeFeature
+from pytensor.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
     AdvancedSubtensor,
@@ -57,7 +57,7 @@
     IncSubtensor,
     Subtensor,
 )
-from aesara.tensor.var import TensorVariable
+from pytensor.tensor.var import TensorVariable
 
 from pymc.logprob.abstract import MeasurableVariable
 from pymc.logprob.utils import DiracDelta, indices_from_subtensor
@@ -280,8 +280,8 @@ def construct_ir_fgraph(
     A custom IR rewriter can be specified. By default,
     `logprob_rewrites_db.query(RewriteDatabaseQuery(include=["basic"]))` is used.
 
-    Our measurable IR takes the form of an Aesara graph that is more-or-less
-    equivalent to a given Aesara graph (i.e. the keys of `rv_values`) but
+    Our measurable IR takes the form of an PyTensor graph that is more-or-less
+    equivalent to a given PyTensor graph (i.e. the keys of `rv_values`) but
     contains `Op`s that are subclasses of the `MeasurableVariable` type in
     place of ones that do not inherit from `MeasurableVariable` in the original
     graph but are nevertheless measurable.
@@ -302,7 +302,7 @@ def construct_ir_fgraph(
     For instance, some `Op`s will be lifted through `MeasurableVariable`\s in
     this IR, and the resulting graphs will not be computationally sound,
     because they wouldn't produce independent samples when the original graph
-    would.  See https://github.com/aesara-devs/aeppl/pull/78.
+    would.  See https://github.com/pytensor-devs/aeppl/pull/78.
 
     Returns
     -------
diff --git a/pymc/logprob/scan.py b/pymc/logprob/scan.py
index ec9b05cf6..41f9bbd7c 100644
--- a/pymc/logprob/scan.py
+++ b/pymc/logprob/scan.py
@@ -37,22 +37,22 @@
 from copy import copy
 from typing import Callable, Dict, Iterable, List, Tuple, cast
 
-import aesara
-import aesara.tensor as at
 import numpy as np
-
-from aesara.graph.basic import Variable
-from aesara.graph.fg import FunctionGraph
-from aesara.graph.op import compute_test_value
-from aesara.graph.rewriting.basic import node_rewriter
-from aesara.graph.rewriting.db import RewriteDatabaseQuery
-from aesara.scan.op import Scan
-from aesara.scan.rewriting import scan_eqopt1, scan_eqopt2
-from aesara.scan.utils import ScanArgs
-from aesara.tensor.random.type import RandomType
-from aesara.tensor.subtensor import Subtensor, indices_from_subtensor
-from aesara.tensor.var import TensorVariable
-from aesara.updates import OrderedUpdates
+import pytensor
+import pytensor.tensor as at
+
+from pytensor.graph.basic import Variable
+from pytensor.graph.fg import FunctionGraph
+from pytensor.graph.op import compute_test_value
+from pytensor.graph.rewriting.basic import node_rewriter
+from pytensor.graph.rewriting.db import RewriteDatabaseQuery
+from pytensor.scan.op import Scan
+from pytensor.scan.rewriting import scan_eqopt1, scan_eqopt2
+from pytensor.scan.utils import ScanArgs
+from pytensor.tensor.random.type import RandomType
+from pytensor.tensor.subtensor import Subtensor, indices_from_subtensor
+from pytensor.tensor.var import TensorVariable
+from pytensor.updates import OrderedUpdates
 
 from pymc.logprob.abstract import MeasurableVariable, _get_measurable_outputs, _logprob
 from pymc.logprob.joint_logprob import factorized_joint_logprob
@@ -210,7 +210,7 @@ def remove(x, i):
 
         slice_seqs = zip(-np.asarray(taps), [n if n < 0 else None for n in reversed(taps)])
 
-        # XXX: If the caller passes the variables output by `aesara.scan`, it's
+        # XXX: If the caller passes the variables output by `pytensor.scan`, it's
         # likely that this will fail, because those variables can sometimes be
         # slices of the actual outer-inputs (e.g. `out[1:]` instead of `out`
         # when `taps=[-1]`).
@@ -355,7 +355,7 @@ def find_measurable_scans(fgraph, node):
     # Find the un-output `MeasurableVariable`s created in the inner-graph
     clients: Dict[Variable, List[Variable]] = {}
 
-    local_fgraph_topo = aesara.graph.basic.io_toposort(
+    local_fgraph_topo = pytensor.graph.basic.io_toposort(
         curr_scanargs.inner_inputs,
         [o for o in curr_scanargs.inner_outputs if not isinstance(o.type, RandomType)],
         clients=clients,
@@ -414,7 +414,7 @@ def find_measurable_scans(fgraph, node):
             return None
 
         # We need this for the `clone` in the loop that follows
-        if aesara.config.compute_test_value != "off":
+        if pytensor.config.compute_test_value != "off":
             compute_test_value(node)
 
         # We're going to replace the user's random variable/value variable mappings
@@ -452,14 +452,14 @@ def find_measurable_scans(fgraph, node):
             alt_type = var_info.name[(var_info.name.index("_", 6) + 1) :]
             outer_input_var = getattr(curr_scanargs, f"outer_in_{alt_type}")[var_info.index]
 
-            # These outer-inputs are using by `aesara.scan.utils.expand_empty`, and
+            # These outer-inputs are using by `pytensor.scan.utils.expand_empty`, and
             # are expected to consist of only a single `set_subtensor` call.
             # That's why we can simply replace the first argument of the node.
             assert isinstance(outer_input_var.owner.op, inc_subtensor_ops)
 
             # We're going to set those values on our `new_val_var` so that it can
             # serve as a complete replacement for the old input `outer_input_var`.
-            # from aesara.graph import clone_replace
+            # from pytensor.graph import clone_replace
             #
             new_val_var = outer_input_var.owner.clone_with_new_inputs(
                 [new_val_var] + outer_input_var.owner.inputs[1:]
diff --git a/pymc/logprob/tensor.py b/pymc/logprob/tensor.py
index f6efe0fba..628825060 100644
--- a/pymc/logprob/tensor.py
+++ b/pymc/logprob/tensor.py
@@ -36,16 +36,19 @@
 
 from typing import List, Optional, Union
 
-import aesara
-
-from aesara import tensor as at
-from aesara.graph.op import compute_test_value
-from aesara.graph.rewriting.basic import node_rewriter
-from aesara.tensor.basic import Join, MakeVector
-from aesara.tensor.elemwise import DimShuffle
-from aesara.tensor.extra_ops import BroadcastTo
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.random.rewriting import local_dimshuffle_rv_lift, local_rv_size_lift
+import pytensor
+
+from pytensor import tensor as at
+from pytensor.graph.op import compute_test_value
+from pytensor.graph.rewriting.basic import node_rewriter
+from pytensor.tensor.basic import Join, MakeVector
+from pytensor.tensor.elemwise import DimShuffle
+from pytensor.tensor.extra_ops import BroadcastTo
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.rewriting import (
+    local_dimshuffle_rv_lift,
+    local_rv_size_lift,
+)
 
 from pymc.logprob.abstract import (
     MeasurableVariable,
@@ -112,7 +115,7 @@ def naive_bcast_rv_lift(fgraph, node):
     ]
     bcasted_node = lifted_node.op.make_node(rng, size, dtype, *new_dist_params)
 
-    if aesara.config.compute_test_value != "off":
+    if pytensor.config.compute_test_value != "off":
         compute_test_value(bcasted_node)
 
     return [bcasted_node.outputs[1]]
@@ -148,7 +151,7 @@ def logprob_join(op, values, axis, *base_vars, **kwargs):
     base_var_shapes = [base_var.shape[axis] for base_var in base_vars]
 
     # TODO: Find better way to avoid circular dependency
-    from pymc.aesaraf import constant_fold
+    from pymc.pytensorf import constant_fold
 
     # We don't need the graph to be constant, just to have RandomVariables removed
     base_var_shapes = constant_fold(base_var_shapes, raise_not_constant=False)
diff --git a/pymc/logprob/transforms.py b/pymc/logprob/transforms.py
index fb1dc19e8..0587ccf90 100644
--- a/pymc/logprob/transforms.py
+++ b/pymc/logprob/transforms.py
@@ -40,22 +40,22 @@
 from functools import partial, singledispatch
 from typing import Callable, Dict, List, Optional, Tuple, Union
 
-import aesara.tensor as at
-
-from aesara.gradient import DisconnectedType, jacobian
-from aesara.graph.basic import Apply, Node, Variable
-from aesara.graph.features import AlreadyThere, Feature
-from aesara.graph.fg import FunctionGraph
-from aesara.graph.op import Op
-from aesara.graph.rewriting.basic import GraphRewriter, in2out, node_rewriter
-from aesara.scalar import Add, Exp, Log, Mul
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.rewriting.basic import (
+import pytensor.tensor as at
+
+from pytensor.gradient import DisconnectedType, jacobian
+from pytensor.graph.basic import Apply, Node, Variable
+from pytensor.graph.features import AlreadyThere, Feature
+from pytensor.graph.fg import FunctionGraph
+from pytensor.graph.op import Op
+from pytensor.graph.rewriting.basic import GraphRewriter, in2out, node_rewriter
+from pytensor.scalar import Add, Exp, Log, Mul
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.rewriting.basic import (
     register_specialize,
     register_stabilize,
     register_useless,
 )
-from aesara.tensor.var import TensorVariable
+from pytensor.tensor.var import TensorVariable
 
 from pymc.logprob.abstract import (
     MeasurableElemwise,
diff --git a/pymc/logprob/utils.py b/pymc/logprob/utils.py
index 4f7e0058c..2887a0318 100644
--- a/pymc/logprob/utils.py
+++ b/pymc/logprob/utils.py
@@ -40,13 +40,13 @@
 
 import numpy as np
 
-from aesara import tensor as at
-from aesara.graph import Apply, Op
-from aesara.graph.basic import Constant, clone_get_equiv, graph_inputs, walk
-from aesara.graph.fg import FunctionGraph
-from aesara.link.c.type import CType
-from aesara.raise_op import CheckAndRaise
-from aesara.tensor.var import TensorVariable
+from pytensor import tensor as at
+from pytensor.graph import Apply, Op
+from pytensor.graph.basic import Constant, clone_get_equiv, graph_inputs, walk
+from pytensor.graph.fg import FunctionGraph
+from pytensor.link.c.type import CType
+from pytensor.raise_op import CheckAndRaise
+from pytensor.tensor.var import TensorVariable
 
 from pymc.logprob.abstract import MeasurableVariable, _logprob
 
diff --git a/pymc/math.py b/pymc/math.py
index c4e022df5..384e38557 100644
--- a/pymc/math.py
+++ b/pymc/math.py
@@ -17,19 +17,19 @@
 
 from functools import partial, reduce
 
-import aesara
-import aesara.sparse
-import aesara.tensor as at
-import aesara.tensor.slinalg  # pylint: disable=unused-import
 import numpy as np
+import pytensor
+import pytensor.sparse
+import pytensor.tensor as at
+import pytensor.tensor.slinalg  # pylint: disable=unused-import
 import scipy as sp
 import scipy.sparse  # pylint: disable=unused-import
 
-from aesara.graph.basic import Apply
-from aesara.graph.op import Op
+from pytensor.graph.basic import Apply
+from pytensor.graph.op import Op
 
 # pylint: disable=unused-import
-from aesara.tensor import (
+from pytensor.tensor import (
     abs,
     and_,
     ceil,
@@ -77,15 +77,14 @@
 )
 
 try:
-    from aesara.tensor.basic import extract_diag
+    from pytensor.tensor.basic import extract_diag
 except ImportError:
-    from aesara.tensor.nlinalg import extract_diag
+    from pytensor.tensor.nlinalg import extract_diag
 
-
-from aesara.tensor.nlinalg import det, matrix_dot, matrix_inverse, trace
+from pytensor.tensor.nlinalg import det, matrix_dot, matrix_inverse, trace
 from scipy.linalg import block_diag as scipy_block_diag
 
-from pymc.aesaraf import floatX, ix_, largest_common_dtype
+from pymc.pytensorf import floatX, ix_, largest_common_dtype
 
 # pylint: enable=unused-import
 
@@ -251,7 +250,7 @@ def kron_diag(*diags):
 
 def tround(*args, **kwargs):
     """
-    Temporary function to silence round warning in Aesara. Please remove
+    Temporary function to silence round warning in PyTensor. Please remove
     when the warning disappears.
     """
     kwargs["mode"] = "half_to_even"
@@ -280,7 +279,7 @@ def invlogit(x, eps=None):
 
 
 def softmax(x, axis=None):
-    # Ignore vector case UserWarning issued by Aesara. This can be removed once Aesara
+    # Ignore vector case UserWarning issued by PyTensor. This can be removed once PyTensor
     # drops that warning
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
@@ -288,7 +287,7 @@ def softmax(x, axis=None):
 
 
 def log_softmax(x, axis=None):
-    # Ignore vector case UserWarning issued by Aesara. This can be removed once Aesara
+    # Ignore vector case UserWarning issued by PyTensor. This can be removed once PyTensor
     # drops that warning
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)
@@ -372,8 +371,8 @@ class LogDet(Op):
     """
 
     def make_node(self, x):
-        x = aesara.tensor.as_tensor_variable(x)
-        o = aesara.tensor.scalar(dtype=x.dtype)
+        x = pytensor.tensor.as_tensor_variable(x)
+        o = pytensor.tensor.scalar(dtype=x.dtype)
         return Apply(self, [x], [o])
 
     def perform(self, node, inputs, outputs, params=None):
@@ -423,7 +422,7 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
     ----------
     n: int
         The number of rows of the triangular matrix.
-    packed: aesara.vector
+    packed: pytensor.vector
         The matrix in packed format.
     lower: bool, default=True
         If true, assume that the matrix is lower triangular.
@@ -442,11 +441,11 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
         diag_idxs = np.arange(2, n + 2)[::-1].cumsum() - n - 1
         return packed[diag_idxs]
     elif lower:
-        out = at.zeros((n, n), dtype=aesara.config.floatX)
+        out = at.zeros((n, n), dtype=pytensor.config.floatX)
         idxs = np.tril_indices(n)
         return at.set_subtensor(out[idxs], packed)
     elif not lower:
-        out = at.zeros((n, n), dtype=aesara.config.floatX)
+        out = at.zeros((n, n), dtype=pytensor.config.floatX)
         idxs = np.triu_indices(n)
         return at.set_subtensor(out[idxs], packed)
 
@@ -516,9 +515,9 @@ def make_node(self, *matrices):
         if any(mat.type.ndim != 2 for mat in matrices):
             raise TypeError("all data arguments must be matrices")
         if self.sparse:
-            out_type = aesara.sparse.matrix(self.format, dtype=largest_common_dtype(matrices))
+            out_type = pytensor.sparse.matrix(self.format, dtype=largest_common_dtype(matrices))
         else:
-            out_type = aesara.tensor.matrix(dtype=largest_common_dtype(matrices))
+            out_type = pytensor.tensor.matrix(dtype=largest_common_dtype(matrices))
         return Apply(self, matrices, [out_type])
 
     def perform(self, node, inputs, output_storage, params=None):
diff --git a/pymc/model.py b/pymc/model.py
index 5c60dc5d0..8c9d85af2 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -33,23 +33,29 @@
     cast,
 )
 
-import aesara
-import aesara.sparse as sparse
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.sparse as sparse
+import pytensor.tensor as at
 import scipy.sparse as sps
 
-from aesara.compile.sharedvalue import SharedVariable
-from aesara.graph.basic import Constant, Variable, graph_inputs
-from aesara.graph.fg import FunctionGraph
-from aesara.scalar import Cast
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.random.rewriting import local_subtensor_rv_lift
-from aesara.tensor.sharedvar import ScalarSharedVariable
-from aesara.tensor.var import TensorConstant, TensorVariable
-
-from pymc.aesaraf import (
+from pytensor.compile.sharedvalue import SharedVariable
+from pytensor.graph.basic import Constant, Variable, graph_inputs
+from pytensor.graph.fg import FunctionGraph
+from pytensor.scalar import Cast
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.rewriting import local_subtensor_rv_lift
+from pytensor.tensor.sharedvar import ScalarSharedVariable
+from pytensor.tensor.var import TensorConstant, TensorVariable
+
+from pymc.blocking import DictToArrayBijection, RaveledVars
+from pymc.data import GenTensorVariable, Minibatch
+from pymc.distributions.logprob import _joint_logp
+from pymc.distributions.transforms import _default_transform
+from pymc.exceptions import ImputationWarning, SamplingError, ShapeError, ShapeWarning
+from pymc.initial_point import make_initial_point_fn
+from pymc.pytensorf import (
     PointFunc,
     SeedSequenceSeed,
     compile_pymc,
@@ -59,12 +65,6 @@
     inputvars,
     replace_rvs_by_values,
 )
-from pymc.blocking import DictToArrayBijection, RaveledVars
-from pymc.data import GenTensorVariable, Minibatch
-from pymc.distributions.logprob import _joint_logp
-from pymc.distributions.transforms import _default_transform
-from pymc.exceptions import ImputationWarning, SamplingError, ShapeError, ShapeWarning
-from pymc.initial_point import make_initial_point_fn
 from pymc.util import (
     UNSET,
     WithMemoization,
@@ -151,16 +151,16 @@ def __new__(cls, name, bases, dct, **kwargs):  # pylint: disable=unused-argument
 
         def __enter__(self):
             self.__class__.context_class.get_contexts().append(self)
-            # self._aesara_config is set in Model.__new__
+            # self._pytensor_config is set in Model.__new__
             self._config_context = None
-            if hasattr(self, "_aesara_config"):
-                self._config_context = aesara.config.change_flags(**self._aesara_config)
+            if hasattr(self, "_pytensor_config"):
+                self._config_context = pytensor.config.change_flags(**self._pytensor_config)
                 self._config_context.__enter__()
             return self
 
         def __exit__(self, typ, value, traceback):  # pylint: disable=unused-argument
             self.__class__.context_class.get_contexts().pop()
-            # self._aesara_config is set in Model.__new__
+            # self._pytensor_config is set in Model.__new__
             if self._config_context:
                 self._config_context.__exit__(typ, value, traceback)
 
@@ -273,20 +273,20 @@ def modelcontext(model: Optional["Model"]) -> "Model":
 
 
 class ValueGradFunction:
-    """Create an Aesara function that computes a value and its gradient.
+    """Create an PyTensor function that computes a value and its gradient.
 
     Parameters
     ----------
-    costs: list of Aesara variables
-        We compute the weighted sum of the specified Aesara values, and the gradient
+    costs: list of PyTensor variables
+        We compute the weighted sum of the specified PyTensor values, and the gradient
         of that sum. The weights can be specified with `ValueGradFunction.set_weights`.
-    grad_vars: list of named Aesara variables or None
+    grad_vars: list of named PyTensor variables or None
         The arguments with respect to which the gradient is computed.
-    extra_vars_and_values: dict of Aesara variables and their initial values
+    extra_vars_and_values: dict of PyTensor variables and their initial values
         Other arguments of the function that are assumed constant and their
         values. They are stored in shared variables and can be set using
         `set_extra_values`.
-    dtype: str, default=aesara.config.floatX
+    dtype: str, default=pytensor.config.floatX
         The dtype of the arrays.
     casting: {'no', 'equiv', 'save', 'same_kind', 'unsafe'}, default='no'
         Casting rule for casting `grad_args` to the array dtype.
@@ -296,12 +296,12 @@ class ValueGradFunction:
     compute_grads: bool, default=True
         If False, return only the logp, not the gradient.
     kwargs
-        Extra arguments are passed on to `aesara.function`.
+        Extra arguments are passed on to `pytensor.function`.
 
     Attributes
     ----------
-    profile: Aesara profiling object or None
-        The profiling object of the Aesara function that computes value and
+    profile: PyTensor profiling object or None
+        The profiling object of the PyTensor function that computes value and
         gradient. This is None unless `profile=True` was set in the
         kwargs.
     """
@@ -331,14 +331,14 @@ def __init__(
         self._extra_var_names = {var.name for var in extra_vars_and_values.keys()}
 
         if dtype is None:
-            dtype = aesara.config.floatX
+            dtype = pytensor.config.floatX
         self.dtype = dtype
 
         self._n_costs = len(costs)
         if self._n_costs == 0:
             raise ValueError("At least one cost is required.")
         weights = np.ones(self._n_costs - 1, dtype=self.dtype)
-        self._weights = aesara.shared(weights, "__weights")
+        self._weights = pytensor.shared(weights, "__weights")
 
         cost = costs[0]
         for i, val in enumerate(costs[1:]):
@@ -362,14 +362,14 @@ def __init__(
         givens = []
         self._extra_vars_shared = {}
         for var, value in extra_vars_and_values.items():
-            shared = aesara.shared(
+            shared = pytensor.shared(
                 value, var.name + "_shared__", shape=[1 if s == 1 else None for s in value.shape]
             )
             self._extra_vars_shared[var.name] = shared
             givens.append((var, shared))
 
         if compute_grads:
-            grads = aesara.grad(cost, grad_vars, disconnected_inputs="ignore")
+            grads = pytensor.grad(cost, grad_vars, disconnected_inputs="ignore")
             for grad_wrt, var in zip(grads, grad_vars):
                 grad_wrt.name = f"{var.name}_grad"
             outputs = [cost] + grads
@@ -378,7 +378,7 @@ def __init__(
 
         inputs = grad_vars
 
-        self._aesara_function = compile_pymc(inputs, outputs, givens=givens, **kwargs)
+        self._pytensor_function = compile_pymc(inputs, outputs, givens=givens, **kwargs)
 
     def set_weights(self, values):
         if values.shape != (self._n_costs - 1,):
@@ -406,7 +406,7 @@ def __call__(self, grad_vars, grad_out=None, extra_vars=None):
         if isinstance(grad_vars, RaveledVars):
             grad_vars = list(DictToArrayBijection.rmap(grad_vars).values())
 
-        cost, *grads = self._aesara_function(*grad_vars)
+        cost, *grads = self._pytensor_function(*grad_vars)
 
         if grads:
             grads_raveled = DictToArrayBijection.map(
@@ -423,8 +423,8 @@ def __call__(self, grad_vars, grad_out=None, extra_vars=None):
 
     @property
     def profile(self):
-        """Profiling information of the underlying Aesara function."""
-        return self._aesara_function.profile
+        """Profiling information of the underlying PyTensor function."""
+        return self._pytensor_function.profile
 
 
 class Model(WithMemoization, metaclass=ContextMeta):
@@ -528,7 +528,7 @@ def __new__(cls, *args, **kwargs):
             instance._parent = kwargs.get("model")
         else:
             instance._parent = cls.get_context(error_if_none=False)
-        instance._aesara_config = kwargs.get("aesara_config", {})
+        instance._pytensor_config = kwargs.get("pytensor_config", {})
         return instance
 
     @staticmethod
@@ -543,10 +543,10 @@ def __init__(
         coords=None,
         check_bounds=True,
         *,
-        aesara_config=None,
+        pytensor_config=None,
         model=None,
     ):
-        del aesara_config, model  # used in __new__
+        del pytensor_config, model  # used in __new__
         self.name = self._validate_name(name)
         self.check_bounds = check_bounds
 
@@ -607,7 +607,7 @@ def isroot(self):
         return self.parent is None
 
     def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs):
-        """Compile an Aesara function that computes logp and gradient.
+        """Compile an PyTensor function that computes logp and gradient.
 
         Parameters
         ----------
@@ -853,30 +853,30 @@ def d2logp(
 
     @property
     def datalogp(self) -> Variable:
-        """Aesara scalar of log-probability of the observed variables and
+        """PyTensor scalar of log-probability of the observed variables and
         potential terms"""
         return self.observedlogp + self.potentiallogp
 
     @property
     def varlogp(self) -> Variable:
-        """Aesara scalar of log-probability of the unobserved random variables
+        """PyTensor scalar of log-probability of the unobserved random variables
         (excluding deterministic)."""
         return self.logp(vars=self.free_RVs)
 
     @property
     def varlogp_nojac(self) -> Variable:
-        """Aesara scalar of log-probability of the unobserved random variables
+        """PyTensor scalar of log-probability of the unobserved random variables
         (excluding deterministic) without jacobian term."""
         return self.logp(vars=self.free_RVs, jacobian=False)
 
     @property
     def observedlogp(self) -> Variable:
-        """Aesara scalar of log-probability of the observed variables"""
+        """PyTensor scalar of log-probability of the observed variables"""
         return self.logp(vars=self.observed_RVs)
 
     @property
     def potentiallogp(self) -> Variable:
-        """Aesara scalar of log-probability of the Potential terms"""
+        """PyTensor scalar of log-probability of the Potential terms"""
         # Convert random variables in Potential expression into their log-likelihood
         # inputs and apply their transforms, if any
         potentials = self.replace_rvs_by_values(self.potentials)
@@ -948,7 +948,7 @@ def basic_RVs(self):
 
         These are the actual random variable terms that make up the
         "sample-space" graph (i.e. you can sample these graphs by compiling them
-        with `aesara.function`).  If you want the corresponding log-likelihood terms,
+        with `pytensor.function`).  If you want the corresponding log-likelihood terms,
         use `model.value_vars` instead.
         """
         return self.free_RVs + self.observed_RVs
@@ -959,7 +959,7 @@ def unobserved_RVs(self):
 
         These are the actual random variable terms that make up the
         "sample-space" graph (i.e. you can sample these graphs by compiling them
-        with `aesara.function`).  If you want the corresponding log-likelihood terms,
+        with `pytensor.function`).  If you want the corresponding log-likelihood terms,
         use `var.unobserved_value_vars` instead.
         """
         return self.free_RVs + self.deterministics
@@ -1027,7 +1027,7 @@ def add_coord(
             Default is False.
         length : optional, scalar
             A scalar of the dimensions length.
-            Defaults to ``aesara.tensor.constant(len(values))``.
+            Defaults to ``pytensor.tensor.constant(len(values))``.
         """
         if name in {"draw", "chain", "__sample__"}:
             raise ValueError(
@@ -1047,15 +1047,15 @@ def add_coord(
                 raise ValueError(f"Duplicate and incompatible coordinate: {name}.")
         if length is not None and not isinstance(length, (int, Variable)):
             raise ValueError(
-                f"The `length` passed for the '{name}' coord must be an int, Aesara Variable or None."
+                f"The `length` passed for the '{name}' coord must be an int, PyTensor Variable or None."
             )
         if length is None:
             length = len(values)
         if not isinstance(length, Variable):
             if mutable:
-                length = aesara.shared(length, name=name)
+                length = pytensor.shared(length, name=name)
             else:
-                length = aesara.tensor.constant(length)
+                length = pytensor.tensor.constant(length)
         self._dim_lengths[name] = length
         self._coords[name] = values
 
@@ -1109,7 +1109,7 @@ def initial_point(self, random_seed: SeedSequenceSeed = None) -> Dict[str, np.nd
         Parameters
         ----------
         random_seed : SeedSequenceSeed, default None
-            Seed(s) for generating initial point from the model. Passed into :func:`pymc.aesaraf.reseed_rngs`
+            Seed(s) for generating initial point from the model. Passed into :func:`pymc.pytensorf.reseed_rngs`
 
         Returns
         -------
@@ -1323,7 +1323,7 @@ def register_rv(
                 isinstance(observed, Variable)
                 and not isinstance(observed, (GenTensorVariable, Minibatch))
                 and observed.owner is not None
-                # The only Aesara operation we allow on observed data is type casting
+                # The only PyTensor operation we allow on observed data is type casting
                 # Although we could allow for any graph that does not depend on other RVs
                 and not (
                     isinstance(observed.owner.op, Elemwise)
@@ -1367,7 +1367,7 @@ def make_obs_var(
                 "Dimensionality of data and RV don't match.", actual=data.ndim, expected=rv_var.ndim
             )
 
-        if aesara.config.compute_test_value != "off":
+        if pytensor.config.compute_test_value != "off":
             test_value = getattr(rv_var.tag, "test_value", None)
 
             if test_value is not None:
@@ -1436,7 +1436,7 @@ def make_obs_var(
             )
             (observed_rv_var,) = local_subtensor_rv_lift.transform(fgraph, fgraph.outputs[0].owner)
             # Make a clone of the RV, but let it create a new rng so that observed and
-            # missing are not treated as equivalent nodes by aesara. This would happen
+            # missing are not treated as equivalent nodes by pytensor. This would happen
             # if the size of the masked and unmasked array happened to coincide
             _, size, _, *inps = observed_rv_var.owner.inputs
             observed_rv_var = observed_rv_var.owner.op(*inps, size=size, name=f"{name}_observed")
@@ -1494,14 +1494,14 @@ def create_value_var(
             # Create value variable with the same type as the RV
             value_var = rv_var.type()
             value_var.name = rv_var.name
-            if aesara.config.compute_test_value != "off":
+            if pytensor.config.compute_test_value != "off":
                 value_var.tag.test_value = rv_var.tag.test_value
         else:
             # Create value variable with the same type as the transformed RV
             value_var = transform.forward(rv_var, *rv_var.owner.inputs).type()
             value_var.name = f"{rv_var.name}_{transform.name}__"
             value_var.tag.transform = transform
-            if aesara.config.compute_test_value != "off":
+            if pytensor.config.compute_test_value != "off":
                 value_var.tag.test_value = transform.forward(
                     rv_var, *rv_var.owner.inputs
                 ).tag.test_value
@@ -1610,23 +1610,23 @@ def compile_fn(
         point_fn: bool = True,
         **kwargs,
     ) -> Union[PointFunc, Callable[[Sequence[np.ndarray]], Sequence[np.ndarray]]]:
-        """Compiles an Aesara function
+        """Compiles an PyTensor function
 
         Parameters
         ----------
         outs
-            Aesara variable or iterable of Aesara variables.
+            PyTensor variable or iterable of PyTensor variables.
         inputs
-            Aesara input variables, defaults to aesaraf.inputvars(outs).
+            PyTensor input variables, defaults to pytensorf.inputvars(outs).
         mode
-            Aesara compilation mode, default=None.
+            PyTensor compilation mode, default=None.
         point_fn : bool
             Whether to wrap the compiled function in a PointFunc, which takes a Point
             dictionary with model variable names and values as input.
 
         Returns
         -------
-        Compiled Aesara function
+        Compiled PyTensor function
         """
         if inputs is None:
             inputs = inputvars(outs)
@@ -1646,12 +1646,12 @@ def compile_fn(
         return fn
 
     def profile(self, outs, *, n=1000, point=None, profile=True, **kwargs):
-        """Compiles and profiles an Aesara function which returns ``outs`` and
+        """Compiles and profiles an PyTensor function which returns ``outs`` and
         takes values of model vars as a dict as an argument.
 
         Parameters
         ----------
-        outs: Aesara variable or iterable of Aesara variables
+        outs: PyTensor variable or iterable of PyTensor variables
         n: int, default 1000
             Number of iterations to run
         point: point
@@ -1704,11 +1704,11 @@ def eval_rv_shapes(self) -> Dict[str, Tuple[int, ...]]:
                 outputs.append(transform.forward(rv, *rv.owner.inputs).shape)
             names.append(rv.name)
             outputs.append(rv.shape)
-        f = aesara.function(
+        f = pytensor.function(
             inputs=[],
             outputs=outputs,
             givens=[(obs, self.rvs_to_values[obs]) for obs in self.observed_RVs],
-            mode=aesara.compile.mode.FAST_COMPILE,
+            mode=pytensor.compile.mode.FAST_COMPILE,
             on_unused_input="ignore",
         )
         return {name: tuple(shape) for name, shape in zip(names, f())}
@@ -1880,16 +1880,16 @@ def compile_fn(
     model: Optional[Model] = None,
     **kwargs,
 ) -> Union[PointFunc, Callable[[Sequence[np.ndarray]], Sequence[np.ndarray]]]:
-    """Compiles an Aesara function
+    """Compiles an PyTensor function
 
     Parameters
     ----------
     outs
-        Aesara variable or iterable of Aesara variables.
+        PyTensor variable or iterable of PyTensor variables.
     inputs
-        Aesara input variables, defaults to aesaraf.inputvars(outs).
+        PyTensor input variables, defaults to pytensorf.inputvars(outs).
     mode
-        Aesara compilation mode, default=None.
+        PyTensor compilation mode, default=None.
     point_fn : bool
         Whether to wrap the compiled function in a PointFunc, which takes a Point
         dictionary with model variable names and values as input.
@@ -1898,7 +1898,7 @@ def compile_fn(
 
     Returns
     -------
-    Compiled Aesara function
+    Compiled PyTensor function
     """
 
     model = modelcontext(model)
@@ -1986,7 +1986,7 @@ def Deterministic(name, var, model=None, dims=None):
     Parameters
     ----------
     name: str
-    var: Aesara variables
+    var: PyTensor variables
     auto: bool
         Add automatically created deterministics (e.g., when imputing missing values)
         to a separate model.auto_deterministics list for filtering during sampling.
@@ -2022,7 +2022,7 @@ def Potential(name, var, model=None):
     Parameters
     ----------
     name: str
-    var: Aesara variables
+    var: PyTensor variables
 
     Returns
     -------
diff --git a/pymc/model_graph.py b/pymc/model_graph.py
index cb4f8038f..48c0318ac 100644
--- a/pymc/model_graph.py
+++ b/pymc/model_graph.py
@@ -16,14 +16,14 @@
 from collections import defaultdict
 from typing import Dict, Iterable, List, NewType, Optional, Sequence, Set
 
-from aesara import function
-from aesara.compile.sharedvalue import SharedVariable
-from aesara.graph import Apply
-from aesara.graph.basic import ancestors, walk
-from aesara.scalar.basic import Cast
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.var import TensorConstant, TensorVariable
+from pytensor import function
+from pytensor.compile.sharedvalue import SharedVariable
+from pytensor.graph import Apply
+from pytensor.graph.basic import ancestors, walk
+from pytensor.scalar.basic import Cast
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.var import TensorConstant, TensorVariable
 
 import pymc as pm
 
diff --git a/pymc/ode/ode.py b/pymc/ode/ode.py
index f400cb7c0..4d67942b0 100644
--- a/pymc/ode/ode.py
+++ b/pymc/ode/ode.py
@@ -14,20 +14,20 @@
 
 import logging
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import scipy
 
-from aesara.graph.basic import Apply
-from aesara.graph.op import Op, get_test_value
-from aesara.tensor.type import TensorType
+from pytensor.graph.basic import Apply
+from pytensor.graph.op import Op, get_test_value
+from pytensor.tensor.type import TensorType
 
 from pymc.exceptions import DtypeError, ShapeError
 from pymc.ode import utils
 
 _log = logging.getLogger("pymc")
-floatX = aesara.config.floatX
+floatX = pytensor.config.floatX
 
 
 class DifferentialEquation(Op):
@@ -170,7 +170,7 @@ def __call__(self, y0, theta, return_sens=False, **kwargs):
         # use default implementation to prepare symbolic outputs (via make_node)
         states, sens = super().__call__(y0, theta, **kwargs)
 
-        if aesara.config.compute_test_value != "off":
+        if pytensor.config.compute_test_value != "off":
             # compute test values from input test values
             test_states, test_sens = self._simulate(
                 y0=get_test_value(y0), theta=get_test_value(theta)
diff --git a/pymc/ode/utils.py b/pymc/ode/utils.py
index b8b510210..f79190674 100644
--- a/pymc/ode/utils.py
+++ b/pymc/ode/utils.py
@@ -12,9 +12,9 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
 
 def make_sens_ic(n_states, n_theta, floatX):
@@ -127,7 +127,7 @@ def augment_system(ode_func, n_states, n_theta):
     # This is the time derivative of dydp
     ddt_dydp = (Jdfdy + grad_f).flatten()
 
-    system = aesara.function(
+    system = pytensor.function(
         inputs=[t_y, t_t, t_p, dydp_vec], outputs=[t_yhat, ddt_dydp], on_unused_input="ignore"
     )
 
diff --git a/pymc/printing.py b/pymc/printing.py
index 36b8f17ee..c37f0fbbe 100644
--- a/pymc/printing.py
+++ b/pymc/printing.py
@@ -16,12 +16,12 @@
 
 from typing import Union
 
-from aesara.compile import SharedVariable
-from aesara.graph.basic import Constant, walk
-from aesara.tensor.basic import TensorVariable, Variable
-from aesara.tensor.elemwise import DimShuffle
-from aesara.tensor.random.basic import RandomVariable
-from aesara.tensor.random.var import (
+from pytensor.compile import SharedVariable
+from pytensor.graph.basic import Constant, walk
+from pytensor.tensor.basic import TensorVariable, Variable
+from pytensor.tensor.elemwise import DimShuffle
+from pytensor.tensor.random.basic import RandomVariable
+from pytensor.tensor.random.var import (
     RandomGeneratorSharedVariable,
     RandomStateSharedVariable,
 )
diff --git a/pymc/aesaraf.py b/pymc/pytensorf.py
similarity index 91%
rename from pymc/aesaraf.py
rename to pymc/pytensorf.py
index d3bae850e..7cf6e1946 100644
--- a/pymc/aesaraf.py
+++ b/pymc/pytensorf.py
@@ -26,17 +26,17 @@
     Union,
 )
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import pandas as pd
+import pytensor
+import pytensor.tensor as at
 import scipy.sparse as sps
 
-from aesara import scalar
-from aesara.compile import Function, Mode, get_mode
-from aesara.gradient import grad
-from aesara.graph import node_rewriter, rewrite_graph
-from aesara.graph.basic import (
+from pytensor import scalar
+from pytensor.compile import Function, Mode, get_mode
+from pytensor.gradient import grad
+from pytensor.graph import node_rewriter, rewrite_graph
+from pytensor.graph.basic import (
     Apply,
     Constant,
     Variable,
@@ -45,22 +45,22 @@
     vars_between,
     walk,
 )
-from aesara.graph.fg import FunctionGraph
-from aesara.graph.op import Op
-from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
-from aesara.scalar.basic import Cast
-from aesara.tensor.basic import _as_tensor_variable
-from aesara.tensor.elemwise import Elemwise
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.random.var import (
+from pytensor.graph.fg import FunctionGraph
+from pytensor.graph.op import Op
+from pytensor.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+from pytensor.scalar.basic import Cast
+from pytensor.tensor.basic import _as_tensor_variable
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.var import (
     RandomGeneratorSharedVariable,
     RandomStateSharedVariable,
 )
-from aesara.tensor.rewriting.basic import topo_constant_folding
-from aesara.tensor.rewriting.shape import ShapeFeature
-from aesara.tensor.sharedvar import SharedVariable, TensorSharedVariable
-from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
-from aesara.tensor.var import TensorConstant, TensorVariable
+from pytensor.tensor.rewriting.basic import topo_constant_folding
+from pytensor.tensor.rewriting.shape import ShapeFeature
+from pytensor.tensor.sharedvar import SharedVariable, TensorSharedVariable
+from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
+from pytensor.tensor.var import TensorConstant, TensorVariable
 
 from pymc.exceptions import NotConstantValueError
 from pymc.logprob.transforms import RVTransform
@@ -397,11 +397,11 @@ def poulate_replacements(rv, replacements):
 
 def inputvars(a):
     """
-    Get the inputs into Aesara variables
+    Get the inputs into PyTensor variables
 
     Parameters
     ----------
-        a: Aesara variable
+        a: PyTensor variable
 
     Returns
     -------
@@ -416,11 +416,11 @@ def inputvars(a):
 
 def cont_inputs(a):
     """
-    Get the continuous inputs into Aesara variables
+    Get the continuous inputs into PyTensor variables
 
     Parameters
     ----------
-        a: Aesara variable
+        a: PyTensor variable
 
     Returns
     -------
@@ -431,13 +431,13 @@ def cont_inputs(a):
 
 def floatX(X):
     """
-    Convert an Aesara tensor or numpy array to aesara.config.floatX type.
+    Convert an PyTensor tensor or numpy array to pytensor.config.floatX type.
     """
     try:
-        return X.astype(aesara.config.floatX)
+        return X.astype(pytensor.config.floatX)
     except AttributeError:
         # Scalar passed
-        return np.asarray(X, dtype=aesara.config.floatX)
+        return np.asarray(X, dtype=pytensor.config.floatX)
 
 
 _conversion_map = {"float64": "int32", "float32": "int16", "float16": "int8", "float8": "int8"}
@@ -445,9 +445,9 @@ def floatX(X):
 
 def intX(X):
     """
-    Convert a aesara tensor or numpy array to aesara.tensor.int32 type.
+    Convert a pytensor tensor or numpy array to pytensor.tensor.int32 type.
     """
-    intX = _conversion_map[aesara.config.floatX]
+    intX = _conversion_map[pytensor.config.floatX]
     try:
         return X.astype(intX)
     except AttributeError:
@@ -465,7 +465,7 @@ def smartfloatX(x):
 
 
 """
-Aesara derivative functions
+PyTensor derivative functions
 """
 
 
@@ -495,7 +495,7 @@ def jacobian1(f, v):
     def grad_i(i):
         return gradient1(f[i], v)
 
-    return aesara.map(grad_i, idx)[0]
+    return pytensor.map(grad_i, idx)[0]
 
 
 def jacobian(f, vars=None):
@@ -514,17 +514,17 @@ def jacobian_diag(f, x):
     def grad_ii(i, f, x):
         return grad(f[i], x)[i]
 
-    return aesara.scan(
+    return pytensor.scan(
         grad_ii, sequences=[idx], n_steps=f.shape[0], non_sequences=[f, x], name="jacobian_diag"
     )[0]
 
 
-@aesara.config.change_flags(compute_test_value="ignore")
+@pytensor.config.change_flags(compute_test_value="ignore")
 def hessian(f, vars=None):
     return -jacobian(gradient(f, vars), vars)
 
 
-@aesara.config.change_flags(compute_test_value="ignore")
+@pytensor.config.change_flags(compute_test_value="ignore")
 def hessian_diag1(f, v):
     g = gradient1(f, v)
     idx = at.arange(g.shape[0], dtype="int32")
@@ -532,10 +532,10 @@ def hessian_diag1(f, v):
     def hess_ii(i):
         return gradient1(g[i], v)[i]
 
-    return aesara.map(hess_ii, idx)[0]
+    return pytensor.map(hess_ii, idx)[0]
 
 
-@aesara.config.change_flags(compute_test_value="ignore")
+@pytensor.config.change_flags(compute_test_value="ignore")
 def hessian_diag(f, vars=None):
     if vars is None:
         vars = cont_inputs(f)
@@ -597,7 +597,7 @@ def make_shared_replacements(point, vars, model):
     """
     othervars = set(model.value_vars) - set(vars)
     return {
-        var: aesara.shared(point[var.name], var.name + "_shared", shape=var.type.shape)
+        var: pytensor.shared(point[var.name], var.name + "_shared", shape=var.type.shape)
         for var in othervars
     }
 
@@ -640,14 +640,14 @@ def join_nonshared_inputs(
 
     Examples
     --------
-    Join the inputs of a simple Aesara graph.
+    Join the inputs of a simple PyTensor graph.
 
     .. code-block:: python
 
-        import aesara.tensor as at
+        import pytensor.tensor as at
         import numpy as np
 
-        from pymc.aesaraf import join_nonshared_inputs
+        from pymc.pytensorf import join_nonshared_inputs
 
         # Original non-shared inputs
         x = at.scalar("x")
@@ -705,7 +705,7 @@ def join_nonshared_inputs(
 
     .. code-block:: python
 
-        from aesara import shared
+        from pytensor import shared
 
         mu_pop_input, *other_inputs = inputs
         shared_mu_pop_input = shared(0.0)
@@ -733,9 +733,9 @@ def join_nonshared_inputs(
         joined_inputs = tensor_type("joined_inputs")
     else:
         joined_values = np.concatenate([point[var.name].ravel() for var in inputs])
-        joined_inputs = aesara.shared(joined_values, "joined_inputs")
+        joined_inputs = pytensor.shared(joined_values, "joined_inputs")
 
-    if aesara.config.compute_test_value != "off":
+    if pytensor.config.compute_test_value != "off":
         joined_inputs.tag.test_value = raveled_inputs.tag.test_value
 
     replace: Dict[TensorVariable, TensorVariable] = {}
@@ -750,7 +750,7 @@ def join_nonshared_inputs(
         replace.update(shared_inputs)
 
     new_outputs = [
-        aesara.clone_replace(output, replace, rebuild_strict=False) for output in outputs
+        pytensor.clone_replace(output, replace, rebuild_strict=False) for output in outputs
     ]
     return new_outputs, joined_inputs
 
@@ -781,12 +781,12 @@ def __call__(self, input):
         input: TensorVariable
         """
         (oldinput,) = inputvars(self.tensor)
-        return aesara.clone_replace(self.tensor, {oldinput: input}, rebuild_strict=False)
+        return pytensor.clone_replace(self.tensor, {oldinput: input}, rebuild_strict=False)
 
 
 class GeneratorOp(Op):
     """
-    Generator Op is designed for storing python generators inside aesara graph.
+    Generator Op is designed for storing python generators inside pytensor graph.
 
     __call__ creates TensorVariable
         It has 2 new methods
@@ -827,7 +827,7 @@ def perform(self, node, inputs, output_storage, params=None):
     def do_constant_folding(self, fgraph, node):
         return False
 
-    __call__ = aesara.config.change_flags(compute_test_value="off")(Op.__call__)
+    __call__ = pytensor.config.change_flags(compute_test_value="off")(Op.__call__)
 
     def set_gen(self, gen):
         from pymc.data import GeneratorAdapter
@@ -883,12 +883,12 @@ def at_rng(random_seed=None):
     ----------
     random_seed: int
         If not None
-        returns *new* aesara random generator without replacing package global one
+        returns *new* pytensor random generator without replacing package global one
 
     Returns
     -------
-    `aesara.tensor.random.utils.RandomStream` instance
-        `aesara.tensor.random.utils.RandomStream`
+    `pytensor.tensor.random.utils.RandomStream` instance
+        `pytensor.tensor.random.utils.RandomStream`
         instance passed to the most recent call of `set_at_rng`
     """
     if random_seed is None:
@@ -904,7 +904,7 @@ def set_at_rng(new_rng):
 
     Parameters
     ----------
-    new_rng: `aesara.tensor.random.utils.RandomStream` instance
+    new_rng: `pytensor.tensor.random.utils.RandomStream` instance
         The random number generator to use.
     """
     # pylint: disable=global-statement
@@ -921,7 +921,7 @@ def floatX_array(x):
 
 def ix_(*args):
     """
-    Aesara np.ix_ analog
+    PyTensor np.ix_ analog
 
     See numpy.lib.index_tricks.ix_ for reference
     """
@@ -972,13 +972,13 @@ def local_check_parameter_to_ninf_switch(fgraph, node):
         return [out]
 
 
-aesara.compile.optdb["canonicalize"].register(
+pytensor.compile.optdb["canonicalize"].register(
     "local_remove_check_parameter",
     local_remove_check_parameter,
     use_db_name_as_tag=False,
 )
 
-aesara.compile.optdb["canonicalize"].register(
+pytensor.compile.optdb["canonicalize"].register(
     "local_check_parameter_to_ninf_switch",
     local_check_parameter_to_ninf_switch,
     use_db_name_as_tag=False,
@@ -1016,7 +1016,7 @@ def replace_rng_nodes(outputs: Sequence[TensorVariable]) -> Sequence[TensorVaria
             rng_cls = np.random.RandomState
         else:
             rng_cls = np.random.Generator
-        new_rng_nodes.append(aesara.shared(rng_cls(np.random.PCG64())))
+        new_rng_nodes.append(pytensor.shared(rng_cls(np.random.PCG64())))
     graph.replace_all(zip(rng_nodes, new_rng_nodes), import_missing=True)
     return graph.outputs
 
@@ -1048,7 +1048,7 @@ def compile_pymc(
     mode=None,
     **kwargs,
 ) -> Function:
-    """Use ``aesara.function`` with specialized pymc rewrites always enabled.
+    """Use ``pytensor.function`` with specialized pymc rewrites always enabled.
 
     This function also ensures shared RandomState/Generator used by RandomVariables
     in the graph are updated across calls, to ensure independent draws.
@@ -1056,14 +1056,14 @@ def compile_pymc(
     Parameters
     ----------
     inputs: list of TensorVariables, optional
-        Inputs of the compiled Aesara function
+        Inputs of the compiled PyTensor function
     outputs: list of TensorVariables, optional
-        Outputs of the compiled Aesara function
+        Outputs of the compiled PyTensor function
     random_seed: int, array-like of int or SeedSequence, optional
         Seed used to override any RandomState/Generator shared variables in the graph.
         If not specified, the value of original shared variables will still be overwritten.
     mode: optional
-        Aesara mode used to compile the function
+        PyTensor mode used to compile the function
 
     Included rewrites
     -----------------
@@ -1136,14 +1136,14 @@ def compile_pymc(
     mode = get_mode(mode)
     opt_qry = mode.provided_optimizer.including("random_make_inplace", check_parameter_opt)
     mode = Mode(linker=mode.linker, optimizer=opt_qry)
-    aesara_function = aesara.function(
+    pytensor_function = pytensor.function(
         inputs,
         outputs,
         updates={**rng_updates, **kwargs.pop("updates", {})},
         mode=mode,
         **kwargs,
     )
-    return aesara_function
+    return pytensor_function
 
 
 def constant_fold(
diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py
index 53847b6ca..e1863c7f9 100644
--- a/pymc/sampling/forward.py
+++ b/pymc/sampling/forward.py
@@ -34,8 +34,10 @@
 import numpy as np
 import xarray
 
-from aesara import tensor as at
-from aesara.graph.basic import (
+from arviz import InferenceData
+from fastprogress.fastprogress import progress_bar
+from pytensor import tensor as at
+from pytensor.graph.basic import (
     Apply,
     Constant,
     Variable,
@@ -43,23 +45,21 @@
     general_toposort,
     walk,
 )
-from aesara.graph.fg import FunctionGraph
-from aesara.tensor.random.var import (
+from pytensor.graph.fg import FunctionGraph
+from pytensor.tensor.random.var import (
     RandomGeneratorSharedVariable,
     RandomStateSharedVariable,
 )
-from aesara.tensor.sharedvar import SharedVariable
-from arviz import InferenceData
-from fastprogress.fastprogress import progress_bar
+from pytensor.tensor.sharedvar import SharedVariable
 from typing_extensions import TypeAlias
 
 import pymc as pm
 
-from pymc.aesaraf import compile_pymc
 from pymc.backends.arviz import _DefaultTrace
 from pymc.backends.base import MultiTrace
 from pymc.blocking import PointType
 from pymc.model import Model, modelcontext
+from pymc.pytensorf import compile_pymc
 from pymc.util import (
     RandomState,
     _get_seeds_per_chain,
@@ -104,7 +104,7 @@ def compile_forward_sampling_function(
 ) -> Tuple[Callable[..., Union[np.ndarray, List[np.ndarray]]], Set[Variable]]:
     """Compile a function to draw samples, conditioned on the values of some variables.
 
-    The goal of this function is to walk the aesara computational graph from the list
+    The goal of this function is to walk the pytensor computational graph from the list
     of output nodes down to the root nodes, and then compile a function that will produce
     values for these output nodes. The compiled function will take as inputs the subset of
     variables in the ``vars_in_trace`` that are deemed to not be **volatile**.
@@ -128,7 +128,7 @@ def compile_forward_sampling_function(
 
     This function also enables a way to impute values for any variable in the computational
     graph that produces the desired outputs: the ``givens_dict``. This dictionary can be used
-    to set the ``givens`` argument of the aesara function compilation. This will essentially
+    to set the ``givens`` argument of the pytensor function compilation. This will essentially
     replace a node in the computational graph with any other expression that has the same
     type as the desired node. Passing variables in the givens_dict is considered an intervention
     that might lead to different variable values from those that could have been seen during
@@ -137,17 +137,17 @@ def compile_forward_sampling_function(
 
     Parameters
     ----------
-    outputs : List[aesara.graph.basic.Variable]
+    outputs : List[pytensor.graph.basic.Variable]
         The list of variables that will be returned by the compiled function
-    vars_in_trace : List[aesara.graph.basic.Variable]
+    vars_in_trace : List[pytensor.graph.basic.Variable]
         The list of variables that are assumed to have values stored in the trace
-    basic_rvs : Optional[List[aesara.graph.basic.Variable]]
+    basic_rvs : Optional[List[pytensor.graph.basic.Variable]]
         A list of random variables that are defined in the model. This list (which could be the
         output of ``model.basic_RVs``) should have a reference to the variables that should
         be considered as random variable instances. This includes variables that have
         a ``RandomVariable`` owner op, but also unpure random variables like Mixtures, or
         Censored distributions.
-    givens_dict : Optional[Dict[aesara.graph.basic.Variable, Any]]
+    givens_dict : Optional[Dict[pytensor.graph.basic.Variable, Any]]
         A dictionary that maps tensor variables to the values that should be used to replace them
         in the compiled function. The types of the key and value should match or an error will be
         raised during compilation.
@@ -174,7 +174,7 @@ def compile_forward_sampling_function(
     Returns
     -------
     function: Callable
-        Compiled forward sampling Aesara function
+        Compiled forward sampling PyTensor function
     volatile_basic_rvs: Set of Variable
         Set of all basic_rvs that were considered volatile and will be resampled when
         the function is evaluated
@@ -281,7 +281,7 @@ def draw(
     random_seed : int, RandomState or numpy_Generator, optional
         Seed for the random number generator.
     **kwargs : dict, optional
-        Keyword arguments for :func:`pymc.aesaraf.compile_pymc`.
+        Keyword arguments for :func:`pymc.pytensorf.compile_pymc`.
 
     Returns
     -------
@@ -371,7 +371,7 @@ def sample_prior_predictive(
     idata_kwargs : dict, optional
         Keyword arguments for :func:`pymc.to_inference_data`
     compile_kwargs: dict, optional
-        Keyword arguments for :func:`pymc.aesaraf.compile_pymc`.
+        Keyword arguments for :func:`pymc.pytensorf.compile_pymc`.
 
     Returns
     -------
@@ -487,7 +487,7 @@ def sample_posterior_predictive(
         Keyword arguments for :func:`pymc.to_inference_data` if ``predictions=False`` or to
         :func:`pymc.predictions_to_inference_data` otherwise.
     compile_kwargs: dict, optional
-        Keyword arguments for :func:`pymc.aesaraf.compile_pymc`.
+        Keyword arguments for :func:`pymc.pytensorf.compile_pymc`.
 
     Returns
     -------
diff --git a/pymc/sampling/jax.py b/pymc/sampling/jax.py
index 9a8b437b5..a50943df8 100644
--- a/pymc/sampling/jax.py
+++ b/pymc/sampling/jax.py
@@ -15,19 +15,19 @@
 
 from datetime import datetime
 
-import aesara.tensor as at
 import arviz as az
 import jax
 import numpy as np
+import pytensor.tensor as at
 
-from aesara.compile import SharedVariable, Supervisor, mode
-from aesara.graph.basic import clone_replace, graph_inputs
-from aesara.graph.fg import FunctionGraph
-from aesara.link.jax.dispatch import jax_funcify
-from aesara.raise_op import Assert
-from aesara.tensor import TensorVariable
-from aesara.tensor.shape import SpecifyShape
 from arviz.data.base import make_attrs
+from pytensor.compile import SharedVariable, Supervisor, mode
+from pytensor.graph.basic import clone_replace, graph_inputs
+from pytensor.graph.fg import FunctionGraph
+from pytensor.link.jax.dispatch import jax_funcify
+from pytensor.raise_op import Assert
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.shape import SpecifyShape
 
 from pymc import Model, modelcontext
 from pymc.backends.arviz import find_constants, find_observations
@@ -86,7 +86,7 @@ def get_jaxified_graph(
     inputs: Optional[List[TensorVariable]] = None,
     outputs: Optional[List[TensorVariable]] = None,
 ) -> List[TensorVariable]:
-    """Compile an Aesara graph into an optimized JAX function"""
+    """Compile an PyTensor graph into an optimized JAX function"""
 
     graph = _replace_shared_variables(outputs)
 
@@ -94,8 +94,8 @@ def get_jaxified_graph(
     # We need to add a Supervisor to the fgraph to be able to run the
     # JAX sequential optimizer without warnings. We made sure there
     # are no mutable input variables, so we only need to check for
-    # "destroyers". This should be automatically handled by Aesara
-    # once https://github.com/aesara-devs/aesara/issues/637 is fixed.
+    # "destroyers". This should be automatically handled by PyTensor
+    # once https://github.com/pytensor-devs/pytensor/issues/637 is fixed.
     fgraph.attach_feature(
         Supervisor(
             input
diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py
index efa5d8c48..8fb41c6de 100644
--- a/pymc/sampling/mcmc.py
+++ b/pymc/sampling/mcmc.py
@@ -24,8 +24,8 @@
 from copy import copy
 from typing import Iterator, List, Optional, Sequence, Tuple, Union
 
-import aesara.gradient as tg
 import numpy as np
+import pytensor.gradient as tg
 
 from arviz import InferenceData
 from fastprogress.fastprogress import progress_bar
diff --git a/pymc/sampling/parallel.py b/pymc/sampling/parallel.py
index 8a528c072..483922da7 100644
--- a/pymc/sampling/parallel.py
+++ b/pymc/sampling/parallel.py
@@ -28,7 +28,7 @@
 
 from fastprogress.fastprogress import progress_bar
 
-from pymc import aesaraf
+from pymc import pytensorf
 from pymc.blocking import DictToArrayBijection
 from pymc.exceptions import SamplingError
 from pymc.util import RandomSeed
@@ -155,7 +155,7 @@ def _recv_msg(self):
 
     def _start_loop(self):
         np.random.seed(self._seed)
-        aesaraf.set_at_rng(self._at_seed)
+        pytensorf.set_at_rng(self._at_seed)
 
         draw = 0
         tuning = True
diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py
index a19507cac..9b7a02e11 100644
--- a/pymc/smc/kernels.py
+++ b/pymc/smc/kernels.py
@@ -18,23 +18,23 @@
 from abc import ABC
 from typing import Dict, cast
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 
-from aesara.graph.basic import clone_replace
+from pytensor.graph.basic import clone_replace
 from scipy.special import logsumexp
 from scipy.stats import multivariate_normal
 
-from pymc.aesaraf import (
+from pymc.backends.ndarray import NDArray
+from pymc.blocking import DictToArrayBijection
+from pymc.initial_point import make_initial_point_expression
+from pymc.model import Point, modelcontext
+from pymc.pytensorf import (
     compile_pymc,
     floatX,
     join_nonshared_inputs,
     make_shared_replacements,
 )
-from pymc.backends.ndarray import NDArray
-from pymc.blocking import DictToArrayBijection
-from pymc.initial_point import make_initial_point_expression
-from pymc.model import Point, modelcontext
 from pymc.sampling.forward import draw
 from pymc.step_methods.metropolis import MultivariateNormalProposal
 from pymc.vartypes import discrete_types
@@ -595,7 +595,7 @@ def systematic_resampling(weights, rng):
 
 
 def _logp_forw(point, out_vars, in_vars, shared):
-    """Compile Aesara function of the model and the input and output variables.
+    """Compile PyTensor function of the model and the input and output variables.
 
     Parameters
     ----------
diff --git a/pymc/step_methods/arraystep.py b/pymc/step_methods/arraystep.py
index d4055dd43..c46324d31 100644
--- a/pymc/step_methods/arraystep.py
+++ b/pymc/step_methods/arraystep.py
@@ -18,8 +18,8 @@
 
 import numpy as np
 
-from aesara.graph.basic import Variable
 from numpy.random import uniform
+from pytensor.graph.basic import Variable
 
 from pymc.blocking import DictToArrayBijection, PointType, RaveledVars, StatsType
 from pymc.model import modelcontext
@@ -132,7 +132,7 @@ class ArrayStep(BlockedStep):
     ----------
     vars: list
         List of value variables for sampler.
-    fs: list of logp Aesara functions
+    fs: list of logp PyTensor functions
     allvars: Boolean (default False)
     blocked: Boolean (default True)
     """
@@ -181,7 +181,7 @@ def __init__(self, vars, shared, blocked=True):
         Parameters
         ----------
         vars: list of sampling value variables
-        shared: dict of Aesara variable -> shared variable
+        shared: dict of PyTensor variable -> shared variable
         blocked: Boolean (default True)
         """
         self.vars = vars
@@ -223,7 +223,7 @@ def __init__(self, vars, shared, blocked=True):
         Parameters
         ----------
         vars: list of sampling value variables
-        shared: dict of Aesara variable -> shared variable
+        shared: dict of PyTensor variable -> shared variable
         blocked: Boolean (default True)
         """
         self.population = None
@@ -255,12 +255,12 @@ def link_population(self, population, chain_index):
 
 class GradientSharedStep(ArrayStepShared):
     def __init__(
-        self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **aesara_kwargs
+        self, vars, model=None, blocked=True, dtype=None, logp_dlogp_func=None, **pytensor_kwargs
     ):
         model = modelcontext(model)
 
         if logp_dlogp_func is None:
-            func = model.logp_dlogp_function(vars, dtype=dtype, **aesara_kwargs)
+            func = model.logp_dlogp_function(vars, dtype=dtype, **pytensor_kwargs)
         else:
             func = logp_dlogp_func
 
diff --git a/pymc/step_methods/hmc/base_hmc.py b/pymc/step_methods/hmc/base_hmc.py
index 844435d6d..9038ff614 100644
--- a/pymc/step_methods/hmc/base_hmc.py
+++ b/pymc/step_methods/hmc/base_hmc.py
@@ -22,10 +22,10 @@
 
 import numpy as np
 
-from pymc.aesaraf import floatX
 from pymc.blocking import DictToArrayBijection, RaveledVars, StatsType
 from pymc.exceptions import SamplingError
 from pymc.model import Point, modelcontext
+from pymc.pytensorf import floatX
 from pymc.stats.convergence import SamplerWarning, WarningType
 from pymc.step_methods import step_sizes
 from pymc.step_methods.arraystep import GradientSharedStep
@@ -75,14 +75,14 @@ def __init__(
         t0=10,
         adapt_step_size=True,
         step_rand=None,
-        **aesara_kwargs,
+        **pytensor_kwargs,
     ):
         """Set up Hamiltonian samplers with common structures.
 
         Parameters
         ----------
         vars: list, default=None
-            List of Aesara variables. If None, all continuous RVs from the
+            List of PyTensor variables. If None, all continuous RVs from the
             model are included.
         scaling: array_like, ndim={1,2}
             Scaling for momentum distribution. 1d arrays interpreted matrix
@@ -98,7 +98,7 @@ def __init__(
         potential: Potential, optional
             An object that represents the Hamiltonian with methods `velocity`,
             `energy`, and `random` methods.
-        **aesara_kwargs: passed to Aesara functions
+        **pytensor_kwargs: passed to PyTensor functions
         """
         self._model = modelcontext(model)
 
@@ -106,7 +106,7 @@ def __init__(
             vars = self._model.continuous_value_vars
         else:
             vars = get_value_vars_from_user_vars(vars, self._model)
-        super().__init__(vars, blocked=blocked, model=self._model, dtype=dtype, **aesara_kwargs)
+        super().__init__(vars, blocked=blocked, model=self._model, dtype=dtype, **pytensor_kwargs)
 
         self.adapt_step_size = adapt_step_size
         self.Emax = Emax
diff --git a/pymc/step_methods/hmc/nuts.py b/pymc/step_methods/hmc/nuts.py
index a692a19f3..993c3f422 100644
--- a/pymc/step_methods/hmc/nuts.py
+++ b/pymc/step_methods/hmc/nuts.py
@@ -18,8 +18,8 @@
 
 import numpy as np
 
-from pymc.aesaraf import floatX
 from pymc.math import logbern
+from pymc.pytensorf import floatX
 from pymc.stats.convergence import SamplerWarning
 from pymc.step_methods.arraystep import Competence
 from pymc.step_methods.hmc import integration
diff --git a/pymc/step_methods/hmc/quadpotential.py b/pymc/step_methods/hmc/quadpotential.py
index f79f0e430..9787a9d23 100644
--- a/pymc/step_methods/hmc/quadpotential.py
+++ b/pymc/step_methods/hmc/quadpotential.py
@@ -18,14 +18,14 @@
 
 from typing import overload
 
-import aesara
 import numpy as np
+import pytensor
 import scipy.linalg
 
 from numpy.random import normal
 from scipy.sparse import issparse
 
-from pymc.aesaraf import floatX
+from pymc.pytensorf import floatX
 
 __all__ = [
     "quad_potential",
@@ -215,7 +215,7 @@ def __init__(
             raise ValueError(f"Wrong shape for initial_mean: expected {n} got {len(initial_mean)}")
 
         if dtype is None:
-            dtype = aesara.config.floatX
+            dtype = pytensor.config.floatX
 
         if initial_diag is None:
             initial_diag = np.ones(n, dtype=dtype)
@@ -240,7 +240,7 @@ def __init__(
 
     def reset(self):
         self._var = np.array(self._initial_diag, dtype=self.dtype, copy=True)
-        self._var_aesara = aesara.shared(self._var)
+        self._var_pytensor = pytensor.shared(self._var)
         self._stds = np.sqrt(self._initial_diag)
         self._inv_stds = floatX(1.0) / self._stds
         self._foreground_var = _WeightedVariance(
@@ -274,7 +274,7 @@ def _update_from_weightvar(self, weightvar):
         self._var = np.clip(self._var, 1e-12, 1e12)
         np.sqrt(self._var, out=self._stds)
         np.divide(1, self._stds, out=self._inv_stds)
-        self._var_aesara.set_value(self._var)
+        self._var_pytensor.set_value(self._var)
 
     def update(self, sample, grad, tune):
         """Inform the potential about a new sample during tuning."""
@@ -499,7 +499,7 @@ def __init__(self, v, dtype=None):
            Diagonal of covariance matrix for the potential vector
         """
         if dtype is None:
-            dtype = aesara.config.floatX
+            dtype = pytensor.config.floatX
         self.dtype = dtype
         v = v.astype(self.dtype)
         s = v**0.5
@@ -543,7 +543,7 @@ def __init__(self, A, dtype=None):
            Inverse of covariance matrix for the potential vector
         """
         if dtype is None:
-            dtype = aesara.config.floatX
+            dtype = pytensor.config.floatX
         self.dtype = dtype
         self.L = floatX(scipy.linalg.cholesky(A, lower=True))
 
@@ -583,7 +583,7 @@ def __init__(self, cov, dtype=None):
             scaling matrix for the potential vector
         """
         if dtype is None:
-            dtype = aesara.config.floatX
+            dtype = pytensor.config.floatX
         self.dtype = dtype
         self._cov = np.array(cov, dtype=self.dtype, copy=True)
         self._chol = scipy.linalg.cholesky(self._cov, lower=True)
@@ -638,7 +638,7 @@ def __init__(
             raise ValueError(f"Wrong shape for initial_mean: expected {n} got {len(initial_mean)}")
 
         if dtype is None:
-            dtype = aesara.config.floatX
+            dtype = pytensor.config.floatX
 
         if initial_cov is None:
             initial_cov = np.eye(n, dtype=dtype)
@@ -771,7 +771,7 @@ def current_mean(self):
 if chol_available:
     __all__ += ["QuadPotentialSparse"]
 
-    import aesara.sparse
+    import pytensor.sparse
 
     class QuadPotentialSparse(QuadPotential):
         def __init__(self, A):
@@ -789,8 +789,8 @@ def __init__(self, A):
 
         def velocity(self, x):
             """Compute the current velocity at a position in parameter space."""
-            A = aesara.sparse.as_sparse(self.A)
-            return aesara.sparse.dot(A, x)
+            A = pytensor.sparse.as_sparse(self.A)
+            return pytensor.sparse.dot(A, x)
 
         def random(self):
             """Draw random value from QuadPotential."""
diff --git a/pymc/step_methods/metropolis.py b/pymc/step_methods/metropolis.py
index c1f0dafe6..839ba417d 100644
--- a/pymc/step_methods/metropolis.py
+++ b/pymc/step_methods/metropolis.py
@@ -13,26 +13,26 @@
 #   limitations under the License.
 from typing import Callable, Dict, List, Optional, Tuple
 
-import aesara
 import numpy as np
 import numpy.random as nr
+import pytensor
 import scipy.linalg
 import scipy.special
 
-from aesara import tensor as at
-from aesara.graph.fg import MissingInputError
-from aesara.tensor.random.basic import BernoulliRV, CategoricalRV
+from pytensor import tensor as at
+from pytensor.graph.fg import MissingInputError
+from pytensor.tensor.random.basic import BernoulliRV, CategoricalRV
 
 import pymc as pm
 
-from pymc.aesaraf import (
+from pymc.blocking import DictToArrayBijection, RaveledVars
+from pymc.pytensorf import (
     CallableTensor,
     compile_pymc,
     floatX,
     join_nonshared_inputs,
     replace_rng_nodes,
 )
-from pymc.blocking import DictToArrayBijection, RaveledVars
 from pymc.step_methods.arraystep import (
     ArrayStep,
     ArrayStepShared,
@@ -158,7 +158,7 @@ def __init__(
         model: PyMC Model
             Optional model for sampling step. Defaults to None (taken from context).
         mode: string or `Mode` instance.
-            compilation mode passed to Aesara functions
+            compilation mode passed to PyTensor functions
         """
 
         model = pm.modelcontext(model)
@@ -717,7 +717,7 @@ class DEMetropolis(PopulationArrayStepShared):
     model: PyMC Model
         Optional model for sampling step. Defaults to None (taken from context).
     mode:  string or `Mode` instance.
-        compilation mode passed to Aesara functions
+        compilation mode passed to PyTensor functions
 
     References
     ----------
@@ -865,7 +865,7 @@ class DEMetropolisZ(ArrayStepShared):
     model: PyMC Model
         Optional model for sampling step. Defaults to None (taken from context).
     mode:  string or `Mode` instance.
-        compilation mode passed to Aesara functions
+        compilation mode passed to PyTensor functions
 
     References
     ----------
@@ -1037,7 +1037,7 @@ def delta_logp(
     logp: at.TensorVariable,
     vars: List[at.TensorVariable],
     shared: Dict[at.TensorVariable, at.sharedvar.TensorSharedVariable],
-) -> aesara.compile.Function:
+) -> pytensor.compile.Function:
     [logp0], inarray0 = join_nonshared_inputs(
         point=point, outputs=[logp], inputs=vars, shared_inputs=shared
     )
diff --git a/pymc/tests/backends/fixtures.py b/pymc/tests/backends/fixtures.py
index a1ac8ce89..1941c344c 100644
--- a/pymc/tests/backends/fixtures.py
+++ b/pymc/tests/backends/fixtures.py
@@ -16,9 +16,9 @@
 import os
 import shutil
 
-import aesara
 import numpy as np
 import numpy.testing as npt
+import pytensor
 import pytest
 
 from pymc.backends import base
@@ -506,7 +506,7 @@ def test_chain_length(self):
         assert self.mtrace0.nchains == self.mtrace1.nchains
         assert len(self.mtrace0) == len(self.mtrace1)
 
-    @pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.xfail(condition=(pytensor.config.floatX == "float32"), reason="Fails on float32")
     def test_dtype(self):
         for varname in self.test_point.keys():
             assert (
diff --git a/pymc/tests/backends/test_arviz.py b/pymc/tests/backends/test_arviz.py
index ec87728ac..4e9e6d55c 100644
--- a/pymc/tests/backends/test_arviz.py
+++ b/pymc/tests/backends/test_arviz.py
@@ -3,14 +3,14 @@
 
 from typing import Dict, Tuple
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 import pytest
 
-from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 from arviz import InferenceData
 from arviz.tests.helpers import check_multiple_attrs
 from numpy import ma
+from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 
 import pymc as pm
 
diff --git a/pymc/tests/conftest.py b/pymc/tests/conftest.py
index 99c39b308..6bc987af2 100644
--- a/pymc/tests/conftest.py
+++ b/pymc/tests/conftest.py
@@ -12,31 +12,31 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 
 import pymc as pm
 
 
 @pytest.fixture(scope="function", autouse=True)
-def aesara_config():
-    config = aesara.config.change_flags(on_opt_error="raise")
+def pytensor_config():
+    config = pytensor.config.change_flags(on_opt_error="raise")
     with config:
         yield
 
 
 @pytest.fixture(scope="function", autouse=True)
 def exception_verbosity():
-    config = aesara.config.change_flags(exception_verbosity="high")
+    config = pytensor.config.change_flags(exception_verbosity="high")
     with config:
         yield
 
 
 @pytest.fixture(scope="function", autouse=False)
 def strict_float32():
-    if aesara.config.floatX == "float32":
-        config = aesara.config.change_flags(warn_float64="raise")
+    if pytensor.config.floatX == "float32":
+        config = pytensor.config.change_flags(warn_float64="raise")
         with config:
             yield
     else:
diff --git a/pymc/tests/distributions/test_bound.py b/pymc/tests/distributions/test_bound.py
index 905aaa683..118358749 100644
--- a/pymc/tests/distributions/test_bound.py
+++ b/pymc/tests/distributions/test_bound.py
@@ -18,7 +18,7 @@
 import pytest
 import scipy.stats as st
 
-from aesara.tensor.random.op import RandomVariable
+from pytensor.tensor.random.op import RandomVariable
 
 import pymc as pm
 
diff --git a/pymc/tests/distributions/test_continuous.py b/pymc/tests/distributions/test_continuous.py
index 7cd9e7219..b69aea9df 100644
--- a/pymc/tests/distributions/test_continuous.py
+++ b/pymc/tests/distributions/test_continuous.py
@@ -14,23 +14,23 @@
 
 import functools as ft
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.special as sp
 import scipy.stats as st
 
-from aesara.compile.mode import Mode
+from pytensor.compile.mode import Mode
 
 import pymc as pm
 
-from pymc.aesaraf import floatX
 from pymc.distributions import logcdf, logp
 from pymc.distributions.continuous import Normal, get_tau_sigma, interpolated
 from pymc.distributions.dist_math import clipped_beta_rvs
 from pymc.logprob.utils import ParameterValueError
+from pymc.pytensorf import floatX
 from pymc.tests.distributions.util import (
     BaseTestDistributionRandom,
     Circ,
@@ -49,7 +49,7 @@
     seeded_scipy_distribution_builder,
 )
 from pymc.tests.helpers import select_by_precision
-from pymc.tests.logprob.utils import create_aesara_params, scipy_logprob_tester
+from pymc.tests.logprob.utils import create_pytensor_params, scipy_logprob_tester
 
 try:
     from polyagamma import polyagamma_cdf, polyagamma_pdf, random_polyagamma
@@ -185,7 +185,7 @@ def test_uniform(self):
         )
         # Custom logp / logcdf check for invalid parameters
         invalid_dist = pm.Uniform.dist(lower=1, upper=0)
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             with pytest.raises(ParameterValueError):
                 logp(invalid_dist, np.array(0.5)).eval()
             with pytest.raises(ParameterValueError):
@@ -209,19 +209,19 @@ def test_triangular(self):
 
         # Custom logp/logcdf check for values outside of domain
         valid_dist = pm.Triangular.dist(lower=0, upper=1, c=0.9, size=2)
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             assert np.all(logp(valid_dist, np.array([-1, 2])).eval() == -np.inf)
             assert np.all(logcdf(valid_dist, np.array([-1, 2])).eval() == [-np.inf, 0])
 
         # Custom logcdf check for invalid parameters.
         # Invalid logp checks for triangular are being done in aeppl
         invalid_dist = pm.Triangular.dist(lower=1, upper=0, c=0.1)
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             with pytest.raises(ParameterValueError):
                 logcdf(invalid_dist, 2).eval()
 
         invalid_dist = pm.Triangular.dist(lower=0, upper=1, c=2.0)
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             with pytest.raises(ParameterValueError):
                 logcdf(invalid_dist, 2).eval()
 
@@ -303,7 +303,7 @@ def test_chisquared_logp(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to numerical issues",
     )
     def test_chisquared_logcdf(self):
@@ -375,7 +375,7 @@ def test_beta_logp(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to numerical issues",
     )
     def test_beta_logcdf(self):
@@ -487,7 +487,7 @@ def test_studentt_logp(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to numerical issues",
     )
     def test_studentt_logcdf(self):
@@ -551,7 +551,7 @@ def test_fun(value, mu, sigma):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to numerical issues",
     )
     def test_gamma_logcdf(self):
@@ -571,7 +571,7 @@ def test_inverse_gamma_logp(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to numerical issues",
     )
     def test_inverse_gamma_logcdf(self):
@@ -583,7 +583,7 @@ def test_inverse_gamma_logcdf(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to scaling issues",
     )
     def test_inverse_gamma_alt_params(self):
@@ -614,7 +614,7 @@ def test_pareto(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to numerical issues",
     )
     def test_weibull_logp(self):
@@ -626,7 +626,7 @@ def test_weibull_logp(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to inf issues",
     )
     def test_weibull_logcdf(self):
@@ -691,7 +691,7 @@ def test_ex_gaussian_cdf_outside_edges(self):
             skip_paramdomain_inside_edge_test=True,  # Valid values are tested above
         )
 
-    @pytest.mark.skipif(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
+    @pytest.mark.skipif(condition=(pytensor.config.floatX == "float32"), reason="Fails on float32")
     def test_vonmises(self):
         check_logp(
             pm.VonMises,
@@ -742,7 +742,7 @@ def test_logitnormal(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Some combinations underflow to -inf in float32 in pymc version",
     )
     def test_rice(self):
@@ -752,7 +752,7 @@ def test_rice(self):
             {"b": Rplus, "sigma": Rplusbig},
             lambda value, b, sigma: st.rice.logpdf(value, b=b, loc=0, scale=sigma),
         )
-        if aesara.config.floatX == "float32":
+        if pytensor.config.floatX == "float32":
             raise Exception("Flaky test: It passed this time, but XPASS is not allowed.")
 
     def test_rice_nu(self):
@@ -774,7 +774,7 @@ def test_moyal_logp(self):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="PyMC underflows earlier than scipy on float32",
     )
     def test_moyal_logcdf(self):
@@ -784,7 +784,7 @@ def test_moyal_logcdf(self):
             {"mu": R, "sigma": Rplusbig},
             lambda value, mu, sigma: floatX(st.moyal.logcdf(value, mu, sigma)),
         )
-        if aesara.config.floatX == "float32":
+        if pytensor.config.floatX == "float32":
             raise Exception("Flaky test: It passed this time, but XPASS is not allowed.")
 
     def test_interpolated(self):
@@ -2246,7 +2246,7 @@ def dist(cls, **kwargs):
                 pymc_random(
                     TestedInterpolated,
                     {},
-                    extra_args={"rng": aesara.shared(rng)},
+                    extra_args={"rng": pytensor.shared(rng)},
                     ref_rand=ref_rand,
                 )
 
@@ -2262,7 +2262,7 @@ class TestICDF:
     )
     def test_normal_icdf(self, dist_params, obs, size):
 
-        dist_params_at, obs_at, size_at = create_aesara_params(dist_params, obs, size)
+        dist_params_at, obs_at, size_at = create_pytensor_params(dist_params, obs, size)
         dist_params = dict(zip(dist_params_at, dist_params))
 
         x = Normal.dist(*dist_params_at, size=size_at)
diff --git a/pymc/tests/distributions/test_discrete.py b/pymc/tests/distributions/test_discrete.py
index 30cac3cbb..134076a47 100644
--- a/pymc/tests/distributions/test_discrete.py
+++ b/pymc/tests/distributions/test_discrete.py
@@ -16,22 +16,22 @@
 import sys
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.special as sp
 import scipy.stats as st
 
-from aesara.compile.mode import Mode
-from aesara.tensor import TensorVariable
+from pytensor.compile.mode import Mode
+from pytensor.tensor import TensorVariable
 
 import pymc as pm
 
-from pymc.aesaraf import floatX
 from pymc.distributions import logcdf, logp
 from pymc.distributions.discrete import Geometric, _OrderedLogistic, _OrderedProbit
 from pymc.logprob.utils import ParameterValueError
+from pymc.pytensorf import floatX
 from pymc.tests.distributions.util import (
     BaseTestDistributionRandom,
     Bool,
@@ -56,7 +56,7 @@
     seeded_numpy_distribution_builder,
     seeded_scipy_distribution_builder,
 )
-from pymc.tests.logprob.utils import create_aesara_params, scipy_logprob_tester
+from pymc.tests.logprob.utils import create_pytensor_params, scipy_logprob_tester
 from pymc.vartypes import discrete_types
 
 
@@ -118,7 +118,7 @@ def test_discrete_unif(self):
         )
         # Custom logp / logcdf check for invalid parameters
         invalid_dist = pm.DiscreteUniform.dist(lower=1, upper=0)
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             with pytest.raises(ParameterValueError):
                 logp(invalid_dist, 0.5).eval()
             with pytest.raises(ParameterValueError):
@@ -180,7 +180,7 @@ def modified_scipy_hypergeom_logcdf(value, N, k, n):
         )
 
     @pytest.mark.xfail(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="SciPy log CDF stopped working after un-pinning NumPy version.",
     )
     def test_negative_binomial(self):
@@ -479,14 +479,14 @@ def test_categorical(self, n):
             lambda value, p: categorical_logpdf(value, p),
         )
 
-    @aesara.config.change_flags(compute_test_value="raise")
+    @pytensor.config.change_flags(compute_test_value="raise")
     def test_categorical_bounds(self):
         with pm.Model():
             x = pm.Categorical("x", p=np.array([0.2, 0.3, 0.5]))
             assert np.isinf(logp(x, -1).eval())
             assert np.isinf(logp(x, 3).eval())
 
-    @aesara.config.change_flags(compute_test_value="raise")
+    @pytensor.config.change_flags(compute_test_value="raise")
     @pytest.mark.parametrize(
         "p",
         [
@@ -849,7 +849,7 @@ def test_categorical_moment(self, p, size, expected):
                 np.arange(1, 5),
                 np.arange(2, 6),
                 None,
-                np.array([0, 1, 1, 2] if aesara.config.floatX == "float64" else [0, 0, 1, 1]),
+                np.array([0, 1, 1, 2] if pytensor.config.floatX == "float64" else [0, 0, 1, 1]),
             ),
             (
                 np.linspace(0.2, 0.6, 3),
@@ -1074,10 +1074,10 @@ def diracdelta_rng_fn(self, size, c):
 
     @pytest.mark.parametrize("floatX", ["float32", "float64"])
     @pytest.mark.xfail(
-        sys.platform == "win32", reason="https://github.com/aesara-devs/aesara/issues/871"
+        sys.platform == "win32", reason="https://github.com/pytensor-devs/pytensor/issues/871"
     )
     def test_dtype(self, floatX):
-        with aesara.config.change_flags(floatX=floatX):
+        with pytensor.config.change_flags(floatX=floatX):
             assert pm.DiracDelta.dist(2**4).dtype == "int8"
             assert pm.DiracDelta.dist(2**16).dtype == "int32"
             assert pm.DiracDelta.dist(2**32).dtype == "int64"
@@ -1165,7 +1165,7 @@ class TestICDF:
     )
     def test_geometric_icdf(self, dist_params, obs, size):
 
-        dist_params_at, obs_at, size_at = create_aesara_params(dist_params, obs, size)
+        dist_params_at, obs_at, size_at = create_pytensor_params(dist_params, obs, size)
         dist_params = dict(zip(dist_params_at, dist_params))
 
         x = Geometric.dist(*dist_params_at, size=size_at)
diff --git a/pymc/tests/distributions/test_dist_math.py b/pymc/tests/distributions/test_dist_math.py
index cecfa0785..84caf133c 100644
--- a/pymc/tests/distributions/test_dist_math.py
+++ b/pymc/tests/distributions/test_dist_math.py
@@ -11,20 +11,19 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.special
 
-from aesara import config, function
-from aesara.tensor.random.basic import multinomial
+from pytensor import config, function
+from pytensor.tensor.random.basic import multinomial
 from scipy import interpolate, stats
 
 import pymc as pm
 
-from pymc.aesaraf import floatX
 from pymc.distributions import Discrete
 from pymc.distributions.dist_math import (
     MvNormalLogp,
@@ -37,6 +36,7 @@
     multigammaln,
 )
 from pymc.logprob.utils import ParameterValueError
+from pymc.pytensorf import floatX
 from pymc.tests.checks import close_to
 from pymc.tests.helpers import verify_grad
 
@@ -138,11 +138,11 @@ def test_logp(self):
         expect = stats.multivariate_normal(mean=np.zeros(2), cov=cov_val)
         expect = expect.logpdf(delta_val).sum()
         logp = MvNormalLogp()(cov, delta)
-        logp_f = aesara.function([cov, delta], logp)
+        logp_f = pytensor.function([cov, delta], logp)
         logp = logp_f(cov_val, delta_val)
         npt.assert_allclose(logp, expect)
 
-    @aesara.config.change_flags(compute_test_value="ignore")
+    @pytensor.config.change_flags(compute_test_value="ignore")
     def test_grad(self):
         np.random.seed(42)
 
@@ -164,7 +164,7 @@ def func(chol_vec, delta):
         delta_val = floatX(np.random.randn(5, 2))
         verify_grad(func, [chol_vec_val, delta_val])
 
-    @aesara.config.change_flags(compute_test_value="ignore")
+    @pytensor.config.change_flags(compute_test_value="ignore")
     def test_hessian(self):
         chol_vec = at.vector("chol_vec")
         chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3]))
@@ -184,14 +184,14 @@ def test_hessian(self):
 
 
 class TestSplineWrapper:
-    @aesara.config.change_flags(compute_test_value="ignore")
+    @pytensor.config.change_flags(compute_test_value="ignore")
     def test_grad(self):
         x = np.linspace(0, 1, 100)
         y = x * x
         spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1))
         verify_grad(spline, [0.5])
 
-    @aesara.config.change_flags(compute_test_value="ignore")
+    @pytensor.config.change_flags(compute_test_value="ignore")
     def test_hessian(self):
         x = np.linspace(0, 1, 100)
         y = x * x
@@ -203,7 +203,7 @@ def test_hessian(self):
 
 
 class TestI0e:
-    @aesara.config.change_flags(compute_test_value="ignore")
+    @pytensor.config.change_flags(compute_test_value="ignore")
     def test_grad(self):
         verify_grad(i0e, [0.5])
         verify_grad(i0e, [-2.0])
diff --git a/pymc/tests/distributions/test_distribution.py b/pymc/tests/distributions/test_distribution.py
index 80b91cb36..75d560e87 100644
--- a/pymc/tests/distributions/test_distribution.py
+++ b/pymc/tests/distributions/test_distribution.py
@@ -13,15 +13,15 @@
 #   limitations under the License.
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.random as npr
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.stats as st
 
-from aesara.tensor import TensorVariable
+from pytensor.tensor import TensorVariable
 
 import pymc as pm
 
@@ -237,10 +237,10 @@ def logp(value, mu):
             mu = pm.Normal("mu", size=supp_shape)
             a = pm.DensityDist("a", mu, logp=logp, ndims_params=[1], ndim_supp=1, size=size)
 
-        mu_test_value = npr.normal(loc=0, scale=1, size=supp_shape).astype(aesara.config.floatX)
+        mu_test_value = npr.normal(loc=0, scale=1, size=supp_shape).astype(pytensor.config.floatX)
         a_test_value = npr.normal(
             loc=mu_test_value, scale=1, size=to_tuple(size) + (supp_shape,)
-        ).astype(aesara.config.floatX)
+        ).astype(pytensor.config.floatX)
         log_densityf = model.compile_logp(vars=[a], sum=False)
         assert log_densityf({"a": a_test_value, "mu": mu_test_value})[0].shape == to_tuple(size)
 
@@ -265,7 +265,7 @@ def test_density_dist_custom_moment_univariate(self, size):
         def density_moment(rv, size, mu):
             return (at.ones(size) * mu).astype(rv.dtype)
 
-        mu_val = np.array(np.random.normal(loc=2, scale=1)).astype(aesara.config.floatX)
+        mu_val = np.array(np.random.normal(loc=2, scale=1)).astype(pytensor.config.floatX)
         with pm.Model():
             mu = pm.Normal("mu")
             a = pm.DensityDist("a", mu, moment=density_moment, size=size)
@@ -278,7 +278,7 @@ def test_density_dist_custom_moment_multivariate(self, size):
         def density_moment(rv, size, mu):
             return (at.ones(size)[..., None] * mu).astype(rv.dtype)
 
-        mu_val = np.random.normal(loc=2, scale=1, size=5).astype(aesara.config.floatX)
+        mu_val = np.random.normal(loc=2, scale=1, size=5).astype(pytensor.config.floatX)
         with pm.Model():
             mu = pm.Normal("mu", size=5)
             a = pm.DensityDist(
@@ -307,7 +307,7 @@ def _random(mu, rng=None, size=None):
         else:
             random = None
 
-        mu_val = np.random.normal(loc=2, scale=1, size=5).astype(aesara.config.floatX)
+        mu_val = np.random.normal(loc=2, scale=1, size=5).astype(pytensor.config.floatX)
         with pm.Model():
             mu = pm.Normal("mu", size=5)
             a = pm.DensityDist("a", mu, random=random, ndims_params=[1], ndim_supp=1, size=size)
diff --git a/pymc/tests/distributions/test_logprob.py b/pymc/tests/distributions/test_logprob.py
index 560055b04..b775107b5 100644
--- a/pymc/tests/distributions/test_logprob.py
+++ b/pymc/tests/distributions/test_logprob.py
@@ -13,15 +13,15 @@
 #   limitations under the License.
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.stats.distributions as sp
 
-from aesara.graph.basic import ancestors
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.subtensor import (
+from pytensor.graph.basic import ancestors
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
     AdvancedSubtensor,
@@ -33,7 +33,6 @@
 import pymc as pm
 
 from pymc import DensityDist
-from pymc.aesaraf import floatX, walk_model
 from pymc.distributions.continuous import (
     HalfFlat,
     LogNormal,
@@ -51,6 +50,7 @@
 )
 from pymc.logprob.abstract import get_measurable_outputs
 from pymc.model import Model, Potential
+from pymc.pytensorf import floatX, walk_model
 from pymc.tests.helpers import assert_no_rvs, select_by_precision
 
 
@@ -150,9 +150,9 @@ def test_joint_logp_incsubtensor(indices, size):
     data = mu[indices]
     sigma = 0.001
     rng = np.random.RandomState(232)
-    a_val = rng.normal(mu, sigma, size=size).astype(aesara.config.floatX)
+    a_val = rng.normal(mu, sigma, size=size).astype(pytensor.config.floatX)
 
-    rng = aesara.shared(rng, borrow=False)
+    rng = pytensor.shared(rng, borrow=False)
     a = Normal.dist(mu, sigma, size=size, rng=rng)
     a_value_var = a.type()
     a.name = "a"
@@ -191,7 +191,7 @@ def test_joint_logp_subtensor():
     mu_base = floatX(np.power(10, np.arange(np.prod(size)))).reshape(size)
     mu = np.stack([mu_base, -mu_base])
     sigma = 0.001
-    rng = aesara.shared(np.random.RandomState(232), borrow=True)
+    rng = pytensor.shared(np.random.RandomState(232), borrow=True)
 
     A_rv = Normal.dist(mu, sigma, rng=rng)
     A_rv.name = "A"
@@ -219,7 +219,7 @@ def test_joint_logp_subtensor():
     )
     A_idx_logp = at.add(*A_idx_logps)
 
-    logp_vals_fn = aesara.function([A_idx_value_var, I_value_var], A_idx_logp)
+    logp_vals_fn = pytensor.function([A_idx_value_var, I_value_var], A_idx_logp)
 
     # The compiled graph should not contain any `RandomVariables`
     assert_no_rvs(logp_vals_fn.maker.fgraph.outputs[0])
@@ -299,10 +299,10 @@ def test_model_unchanged_logprob_access():
         a = Normal("a")
         c = Uniform("c", lower=a - 1, upper=1)
 
-    original_inputs = set(aesara.graph.graph_inputs([c]))
+    original_inputs = set(pytensor.graph.graph_inputs([c]))
     # Extract model.logp
     model.logp()
-    new_inputs = set(aesara.graph.graph_inputs([c]))
+    new_inputs = set(pytensor.graph.graph_inputs([c]))
     assert original_inputs == new_inputs
 
 
@@ -403,5 +403,5 @@ def test_logprob_join_constant_shapes():
     # This is what Aeppl does not do!
     assert_no_rvs(xy_logp)
 
-    f = aesara.function([xy_vv], xy_logp)
+    f = pytensor.function([xy_vv], xy_logp)
     np.testing.assert_array_equal(f(np.zeros(8)), sp.norm.logpdf(np.zeros(8)))
diff --git a/pymc/tests/distributions/test_mixture.py b/pymc/tests/distributions/test_mixture.py
index 50d66ef7c..8a1326e6f 100644
--- a/pymc/tests/distributions/test_mixture.py
+++ b/pymc/tests/distributions/test_mixture.py
@@ -16,18 +16,17 @@
 
 from contextlib import ExitStack as does_not_raise
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 import scipy.stats as st
 
-from aesara import tensor as at
-from aesara.tensor import TensorVariable
-from aesara.tensor.random.op import RandomVariable
 from numpy.testing import assert_allclose
+from pytensor import tensor as at
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.random.op import RandomVariable
 from scipy.special import logsumexp
 
-from pymc.aesaraf import floatX
 from pymc.distributions import (
     Categorical,
     Dirichlet,
@@ -55,6 +54,7 @@
 from pymc.logprob.transforms import IntervalTransform, LogTransform, SimplexTransform
 from pymc.math import expand_packed_triangular
 from pymc.model import Model
+from pymc.pytensorf import floatX
 from pymc.sampling.forward import (
     draw,
     sample_posterior_predictive,
@@ -607,7 +607,7 @@ def test_list_mvnormals_predictive_sampling_shape(self):
         assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
 
     def test_nested_mixture(self):
-        if aesara.config.floatX == "float32":
+        if pytensor.config.floatX == "float32":
             rtol = 1e-4
         else:
             rtol = 1e-7
@@ -638,7 +638,7 @@ def test_nested_mixture(self):
         test_point = model.initial_point()
 
         def mixmixlogp(value, point):
-            floatX = aesara.config.floatX
+            floatX = pytensor.config.floatX
             priorlogp = (
                 st.dirichlet.logpdf(
                     x=point["g_w"],
@@ -698,7 +698,7 @@ def test_iterable_single_component_warning(self):
 
     @pytest.mark.parametrize("floatX", ["float32", "float64"])
     def test_mixture_dtype(self, floatX):
-        with aesara.config.change_flags(floatX=floatX):
+        with pytensor.config.change_flags(floatX=floatX):
             mix_dtype = Mixture.dist(
                 w=[0.5, 0.5],
                 comp_dists=[
@@ -966,7 +966,7 @@ def loose_logp(model, vars):
                 on_unused_input="ignore",
             )
 
-        if aesara.config.floatX == "float32":
+        if pytensor.config.floatX == "float32":
             rtol = 1e-4
         else:
             rtol = 1e-7
@@ -1026,7 +1026,7 @@ def test_with_multinomial(self, batch_shape):
         assert prior["mixture"].shape == (self.n_samples, *batch_shape, 3)
         assert draw(mixture, draws=self.size).shape == (self.size, *batch_shape, 3)
 
-        if aesara.config.floatX == "float32":
+        if pytensor.config.floatX == "float32":
             rtol = 1e-4
         else:
             rtol = 1e-7
@@ -1058,7 +1058,7 @@ def test_with_mvnormal(self):
         assert prior["mixture"].shape == (self.n_samples, 3)
         assert draw(mixture, draws=self.size).shape == (self.size, 3)
 
-        if aesara.config.floatX == "float32":
+        if pytensor.config.floatX == "float32":
             rtol = 1e-4
         else:
             rtol = 1e-7
diff --git a/pymc/tests/distributions/test_multivariate.py b/pymc/tests/distributions/test_multivariate.py
index 5ab9dc6b3..f5b3fc04e 100644
--- a/pymc/tests/distributions/test_multivariate.py
+++ b/pymc/tests/distributions/test_multivariate.py
@@ -16,21 +16,20 @@
 import re
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.random as npr
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.special as sp
 import scipy.stats as st
 
-from aesara.tensor import TensorVariable
-from aesara.tensor.random.utils import broadcast_params
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.random.utils import broadcast_params
 
 import pymc as pm
 
-from pymc.aesaraf import compile_pymc, floatX, intX
 from pymc.distributions import logp
 from pymc.distributions.multivariate import (
     _LKJCholeskyCov,
@@ -41,6 +40,7 @@
 from pymc.distributions.shape_utils import change_dist_size, to_tuple
 from pymc.logprob.utils import ParameterValueError
 from pymc.math import kronecker
+from pymc.pytensorf import compile_pymc, floatX, intX
 from pymc.sampling.forward import draw
 from pymc.tests.distributions.util import (
     BaseTestDistributionRandom,
@@ -292,7 +292,7 @@ def test_mvnormal(self, n):
         )
 
     @pytest.mark.skipif(
-        condition=(aesara.config.floatX == "float32"),
+        condition=(pytensor.config.floatX == "float32"),
         reason="Fails on float32 due to inf issues",
     )
     def test_mvnormal_indef(self):
@@ -303,19 +303,19 @@ def test_mvnormal_indef(self):
         x = at.vector("x")
         x.tag.test_value = np.zeros(2)
         mvn_logp = logp(pm.MvNormal.dist(mu=mu, cov=cov), x)
-        f_logp = aesara.function([cov, x], mvn_logp)
+        f_logp = pytensor.function([cov, x], mvn_logp)
         with pytest.raises(ParameterValueError):
             f_logp(cov_val, np.ones(2))
         dlogp = at.grad(mvn_logp, cov)
-        f_dlogp = aesara.function([cov, x], dlogp)
+        f_dlogp = pytensor.function([cov, x], dlogp)
         assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2))))
 
         mvn_logp = logp(pm.MvNormal.dist(mu=mu, tau=cov), x)
-        f_logp = aesara.function([cov, x], mvn_logp)
+        f_logp = pytensor.function([cov, x], mvn_logp)
         with pytest.raises(ParameterValueError):
             f_logp(cov_val, np.ones(2))
         dlogp = at.grad(mvn_logp, cov)
-        f_dlogp = aesara.function([cov, x], dlogp)
+        f_dlogp = pytensor.function([cov, x], dlogp)
         assert not np.all(np.isfinite(f_dlogp(cov_val, np.ones(2))))
 
     def test_mvnormal_init_fail(self):
@@ -775,9 +775,9 @@ def test_car_logp(self, sparse, size):
         cov = np.linalg.inv(prec)
         scipy_logp = st.multivariate_normal.logpdf(xs, mu, cov)
 
-        W = aesara.tensor.as_tensor_variable(W)
+        W = pytensor.tensor.as_tensor_variable(W)
         if sparse:
-            W = aesara.sparse.csr_from_dense(W)
+            W = pytensor.sparse.csr_from_dense(W)
 
         car_dist = pm.CAR.dist(mu, W, alpha, tau, size=size)
         car_logp = logp(car_dist, xs).eval()
@@ -788,7 +788,7 @@ def test_car_logp(self, sparse, size):
 
         # Check to make sure all the delta values are identical.
         tol = 1e-08
-        if aesara.config.floatX == "float32":
+        if pytensor.config.floatX == "float32":
             tol = 1e-5
         assert np.allclose(delta_logp - delta_logp[0], 0.0, atol=tol)
 
@@ -812,9 +812,9 @@ def test_car_matrix_check(sparse):
     W = np.array(
         [[0.0, 1.0, 2.0, 0.0], [1.0, 0.0, 0.0, 1.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0]]
     )
-    W = aesara.tensor.as_tensor_variable(W)
+    W = pytensor.tensor.as_tensor_variable(W)
     if sparse:
-        W = aesara.sparse.csr_from_dense(W)
+        W = pytensor.sparse.csr_from_dense(W)
 
     car_dist = pm.CAR.dist(mu, W, alpha, tau)
     with pytest.raises(AssertionError, match="W must be a symmetric adjacency matrix"):
@@ -823,7 +823,7 @@ def test_car_matrix_check(sparse):
     # W.ndim != 2
     if not sparse:
         W = np.array([0.0, 1.0, 2.0, 0.0])
-        W = aesara.tensor.as_tensor_variable(W)
+        W = pytensor.tensor.as_tensor_variable(W)
         with pytest.raises(ValueError, match="W must be a matrix"):
             car_dist = pm.CAR.dist(mu, W, alpha, tau)
 
@@ -1740,7 +1740,7 @@ class TestDirichletMultinomial(BaseTestDistributionRandom):
     ]
 
     def check_random_draws(self):
-        default_rng = aesara.shared(np.random.default_rng(1234))
+        default_rng = pytensor.shared(np.random.default_rng(1234))
         draws = pm.DirichletMultinomial.dist(
             n=np.array([5, 100]),
             a=np.array([[0.001, 0.001, 0.001, 1000], [1000, 1000, 0.001, 0.001]]),
@@ -1787,7 +1787,7 @@ class TestStickBreakingWeights(BaseTestDistributionRandom):
     ]
 
     def check_basic_properties(self):
-        default_rng = aesara.shared(np.random.default_rng(1234))
+        default_rng = pytensor.shared(np.random.default_rng(1234))
         draws = pm.StickBreakingWeights.dist(
             alpha=3.5,
             K=19,
@@ -1917,7 +1917,7 @@ def check_errors(self):
                 colcov=np.eye(3),
             )
             with pytest.raises(ValueError):
-                logp(matrixnormal, aesara.tensor.ones((3, 3, 3)))
+                logp(matrixnormal, pytensor.tensor.ones((3, 3, 3)))
 
     def check_random_variable_prior(self):
         """
@@ -2082,9 +2082,9 @@ def test_car_rng_fn(sparse):
     D = W.sum(axis=0)
     prec = tau * (np.diag(D) - alpha * W)
     cov = np.linalg.inv(prec)
-    W = aesara.tensor.as_tensor_variable(W)
+    W = pytensor.tensor.as_tensor_variable(W)
     if sparse:
-        W = aesara.sparse.csr_from_dense(W)
+        W = pytensor.sparse.csr_from_dense(W)
 
     with pm.Model():
         car = pm.CAR("car", mu, W, alpha, tau, size=size)
@@ -2119,5 +2119,5 @@ def test_posdef_symmetric(matrix, result):
 
     Is this correct?
     """
-    data = np.array(matrix, dtype=aesara.config.floatX)
+    data = np.array(matrix, dtype=pytensor.config.floatX)
     assert posdef(data) == result
diff --git a/pymc/tests/distributions/test_shape_utils.py b/pymc/tests/distributions/test_shape_utils.py
index b47322783..b335fefbd 100644
--- a/pymc/tests/distributions/test_shape_utils.py
+++ b/pymc/tests/distributions/test_shape_utils.py
@@ -14,16 +14,16 @@
 import re
 import warnings
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 
-from aesara import tensor as at
-from aesara.compile.mode import Mode
-from aesara.graph import Constant, ancestors
-from aesara.tensor import TensorVariable
-from aesara.tensor.random import normal
-from aesara.tensor.shape import SpecifyShape
+from pytensor import tensor as at
+from pytensor.compile.mode import Mode
+from pytensor.graph import Constant, ancestors
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.random import normal
+from pytensor.tensor.shape import SpecifyShape
 
 import pymc as pm
 
@@ -251,7 +251,7 @@ def test_param_and_batch_shape_combos(
         assert len(batch_dims) == len(batch_shape)
 
         with pm.Model(coords=coords) as pmodel:
-            mu = aesara.shared(np.random.normal(size=param_shape))
+            mu = pytensor.shared(np.random.normal(size=param_shape))
 
             with warnings.catch_warnings():
                 warnings.simplefilter("error")
@@ -348,14 +348,15 @@ def test_size32_doesnt_break_broadcasting(self):
         assert rv.broadcastable == (True, False)
 
     def test_observed_with_column_vector(self):
-        """This test is related to https://github.com/pymc-devs/aesara/issues/390 which breaks
+        """This test is related to https://github.com/pymc-devs/pytensor/issues/390 which breaks
         broadcastability of column-vector RVs. This unexpected change in type can lead to
         incompatibilities during graph rewriting for model.logp evaluation.
         """
         with pm.Model() as model:
             # The `observed` is a broadcastable column vector
             obs = [
-                at.as_tensor_variable(np.ones((3, 1), dtype=aesara.config.floatX)) for _ in range(4)
+                at.as_tensor_variable(np.ones((3, 1), dtype=pytensor.config.floatX))
+                for _ in range(4)
             ]
             assert all(obs_.broadcastable == (False, True) for obs_ in obs)
 
@@ -373,7 +374,7 @@ def test_observed_with_column_vector(self):
             assert model.compile_logp()({})
 
     def test_dist_api_works(self):
-        mu = aesara.shared(np.array([1, 2, 3]))
+        mu = pytensor.shared(np.array([1, 2, 3]))
         with pytest.raises(NotImplementedError, match="API is not supported"):
             pm.Normal.dist(mu=mu, dims=("town",))
         assert pm.Normal.dist(mu=mu, shape=(3,)).eval().shape == (3,)
@@ -442,7 +443,7 @@ def test_size_from_dims_rng_update(self):
         with pm.Model(coords=dict(x_dim=range(2))):
             x = pm.Normal("x", dims=("x_dim",))
 
-        fn = pm.aesaraf.compile_pymc([], x)
+        fn = pm.pytensorf.compile_pymc([], x)
         # Check that both function outputs (rng and draws) come from the same Apply node
         assert fn.maker.fgraph.outputs[0].owner is fn.maker.fgraph.outputs[1].owner
 
@@ -457,7 +458,7 @@ def test_size_from_observed_rng_update(self):
         with pm.Model():
             x = pm.Normal("x", observed=[0, 1])
 
-        fn = pm.aesaraf.compile_pymc([], x)
+        fn = pm.pytensorf.compile_pymc([], x)
         # Check that both function outputs (rng and draws) come from the same Apply node
         assert fn.maker.fgraph.outputs[0].owner is fn.maker.fgraph.outputs[1].owner
 
@@ -494,7 +495,7 @@ def test_rv_size_is_none():
 
 def test_change_rv_size():
     loc = at.as_tensor_variable([1, 2])
-    rng = aesara.shared(np.random.default_rng())
+    rng = pytensor.shared(np.random.default_rng())
     rv = normal(loc=loc, rng=rng)
     assert rv.ndim == 1
     assert tuple(rv.shape.eval()) == (2,)
@@ -544,7 +545,7 @@ def test_change_rv_size():
 
 
 def test_change_rv_size_default_update():
-    rng = aesara.shared(np.random.default_rng(0))
+    rng = pytensor.shared(np.random.default_rng(0))
     x = normal(rng=rng)
 
     # Test that "traditional" default_update is translated to the new rng
@@ -555,7 +556,7 @@ def test_change_rv_size_default_update():
     assert new_rng.default_update is new_x.owner.outputs[0]
 
     # Test that "non-traditional" default_update raises UserWarning
-    next_rng = aesara.shared(np.random.default_rng(1))
+    next_rng = pytensor.shared(np.random.default_rng(1))
     rng.default_update = next_rng
     with pytest.warns(UserWarning, match="could not be replicated in resized variable"):
         new_x = change_dist_size(x, new_size=(2,))
@@ -571,7 +572,7 @@ def test_change_rv_size_default_update():
 
 
 def test_change_specify_shape_size_univariate():
-    with aesara.config.change_flags(mode=Mode("py")):
+    with pytensor.config.change_flags(mode=Mode("py")):
         s1, s2 = at.iscalars("s1", "s2")
         x = at.random.normal(size=(s1, s2))
         x = at.specify_shape(x, (5, 3))
@@ -591,7 +592,7 @@ def test_change_specify_shape_size_univariate():
 
 
 def test_change_specify_shape_size_multivariate():
-    with aesara.config.change_flags(mode=Mode("py")):
+    with pytensor.config.change_flags(mode=Mode("py")):
         batch, supp = at.iscalars("batch", "supp")
         x = at.random.multivariate_normal(at.zeros(supp), at.eye(supp), size=(batch,))
         x = at.specify_shape(x, (5, 3))
@@ -666,7 +667,7 @@ def test_get_support_shape_1d(
             assert inferred_support_shape.eval() == expected_support_shape
         else:
             # check that inferred steps is still correct by ignoring the assert
-            f = aesara.function(
+            f = pytensor.function(
                 [], inferred_support_shape, mode=Mode().including("local_remove_all_assert")
             )
             assert f() == expected_support_shape
@@ -742,7 +743,7 @@ def test_get_support_shape(
             assert (inferred_support_shape.eval() == expected_support_shape).all()
         else:
             # check that inferred support shape is still correct by ignoring the assert
-            f = aesara.function(
+            f = pytensor.function(
                 [], inferred_support_shape, mode=Mode().including("local_remove_all_assert")
             )
             assert (f() == expected_support_shape).all()
diff --git a/pymc/tests/distributions/test_simulator.py b/pymc/tests/distributions/test_simulator.py
index 9576f9afc..e5ffb9328 100644
--- a/pymc/tests/distributions/test_simulator.py
+++ b/pymc/tests/distributions/test_simulator.py
@@ -13,24 +13,24 @@
 #   limitations under the License.
 import warnings
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 import scipy.stats as st
 
-from aesara.graph import ancestors
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.random.var import (
+from pytensor.graph import ancestors
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.var import (
     RandomGeneratorSharedVariable,
     RandomStateSharedVariable,
 )
-from aesara.tensor.sort import SortOp
+from pytensor.tensor.sort import SortOp
 
 import pymc as pm
 
 from pymc import floatX
-from pymc.aesaraf import compile_pymc
 from pymc.initial_point import make_initial_point_fn
+from pymc.pytensorf import compile_pymc
 from pymc.smc.kernels import IMH
 from pymc.tests.helpers import SeededTest
 
@@ -95,7 +95,7 @@ def test_one_gaussian(self):
 
     @pytest.mark.parametrize("floatX", ["float32", "float64"])
     def test_custom_dist_sum_stat(self, floatX):
-        with aesara.config.change_flags(floatX=floatX):
+        with pytensor.config.change_flags(floatX=floatX):
             with pm.Model() as m:
                 a = pm.Normal("a", mu=0, sigma=1)
                 b = pm.HalfNormal("b", sigma=1)
@@ -123,7 +123,7 @@ def test_custom_dist_sum_stat_scalar(self, floatX):
         """
         scalar_data = 5
 
-        with aesara.config.change_flags(floatX=floatX):
+        with pytensor.config.change_flags(floatX=floatX):
             with pm.Model() as m:
                 s = pm.Simulator(
                     "s",
diff --git a/pymc/tests/distributions/test_timeseries.py b/pymc/tests/distributions/test_timeseries.py
index 25ae048d8..587adf1c2 100644
--- a/pymc/tests/distributions/test_timeseries.py
+++ b/pymc/tests/distributions/test_timeseries.py
@@ -11,17 +11,16 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
-import aesara
 import numpy as np
+import pytensor
 import pytest
 import scipy.stats as st
 
-from aesara.tensor.random.op import RandomVariable
+from pytensor.tensor.random.op import RandomVariable
 
 import pymc as pm
 
 from pymc import MutableData
-from pymc.aesaraf import floatX
 from pymc.distributions.continuous import Exponential, Flat, HalfNormal, Normal, Uniform
 from pymc.distributions.discrete import DiracDelta
 from pymc.distributions.logprob import logp
@@ -42,6 +41,7 @@
     RandomWalk,
 )
 from pymc.model import Model
+from pymc.pytensorf import floatX
 from pymc.sampling.forward import draw, sample_posterior_predictive
 from pymc.sampling.mcmc import sample
 from pymc.tests.distributions.util import assert_moment_is_expected
@@ -575,7 +575,7 @@ def test_batched_sigma(self):
         ar_order, steps, batch_size = 4, 100, (7, 5)
         # AR order cannot be inferred from beta_tp because it is not fixed.
         # We specify it manually below
-        beta_tp = aesara.shared(np.random.randn(ar_order))
+        beta_tp = pytensor.shared(np.random.randn(ar_order))
         sigma_tp = np.abs(np.random.randn(*batch_size))
         y_tp = np.random.randn(*batch_size, steps)
         with Model() as t0:
@@ -621,7 +621,7 @@ def test_batched_sigma(self):
 
     def test_batched_init_dist(self):
         ar_order, steps, batch_size = 3, 100, 5
-        beta_tp = aesara.shared(np.random.randn(ar_order), shape=(3,))
+        beta_tp = pytensor.shared(np.random.randn(ar_order), shape=(3,))
         y_tp = np.random.randn(batch_size, steps)
         with Model() as t0:
             init_dist = Normal.dist(0.0, 100.0, size=(batch_size, ar_order))
diff --git a/pymc/tests/distributions/test_transform.py b/pymc/tests/distributions/test_transform.py
index 5bed2a9a4..dc46177b1 100644
--- a/pymc/tests/distributions/test_transform.py
+++ b/pymc/tests/distributions/test_transform.py
@@ -15,18 +15,18 @@
 
 from typing import Union
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from aesara.tensor.var import TensorConstant
+from pytensor.tensor.var import TensorConstant
 
 import pymc as pm
 import pymc.distributions.transforms as tr
 
-from pymc.aesaraf import floatX, jacobian
 from pymc.distributions.logprob import _joint_logp
+from pymc.pytensorf import floatX, jacobian
 from pymc.tests.checks import close_to, close_to_logical
 from pymc.tests.distributions.util import (
     Circ,
@@ -44,7 +44,7 @@
 
 # some transforms (stick breaking) require additon of small slack in order to be numerically
 # stable. The minimal addable slack for float32 is higher thus we need to be less strict
-tol = 1e-7 if aesara.config.floatX == "float64" else 1e-6
+tol = 1e-7 if pytensor.config.floatX == "float64" else 1e-6
 
 
 def check_transform(transform, domain, constructor=at.dscalar, test=0, rv_var=None):
@@ -55,9 +55,9 @@ def check_transform(transform, domain, constructor=at.dscalar, test=0, rv_var=No
     rv_inputs = rv_var.owner.inputs if rv_var.owner else []
     # test forward and forward_val
     # FIXME: What's being tested here?  That the transformed graph can compile?
-    forward_f = aesara.function([x], transform.forward(x, *rv_inputs))
+    forward_f = pytensor.function([x], transform.forward(x, *rv_inputs))
     # test transform identity
-    identity_f = aesara.function(
+    identity_f = pytensor.function(
         [x], transform.backward(transform.forward(x, *rv_inputs), *rv_inputs)
     )
     for val in domain.vals:
@@ -74,7 +74,7 @@ def get_values(transform, domain=R, constructor=at.dscalar, test=0, rv_var=None)
     if rv_var is None:
         rv_var = x
     rv_inputs = rv_var.owner.inputs if rv_var.owner else []
-    f = aesara.function([x], transform.backward(x, *rv_inputs))
+    f = pytensor.function([x], transform.backward(x, *rv_inputs))
     return np.array([f(val) for val in domain.vals])
 
 
@@ -105,9 +105,9 @@ def check_jacobian_det(
         jac = at.log(at.abs(at.diag(jacobian(x, [y]))))
 
     # ljd = log jacobian det
-    actual_ljd = aesara.function([y], jac)
+    actual_ljd = pytensor.function([y], jac)
 
-    computed_ljd = aesara.function(
+    computed_ljd = pytensor.function(
         [y], at.as_tensor_variable(transform.log_jac_det(y, *rv_inputs)), on_unused_input="ignore"
     )
 
@@ -136,7 +136,7 @@ def test_simplex_accuracy():
     val = np.array([-30])
     x = at.dvector("x")
     x.tag.test_value = val
-    identity_f = aesara.function([x], tr.simplex.forward(x, tr.simplex.backward(x, x)))
+    identity_f = pytensor.function([x], tr.simplex.forward(x, tr.simplex.backward(x, x)))
     close_to(val, identity_f(val), tol)
 
 
@@ -223,7 +223,7 @@ def test_interval():
 
 
 @pytest.mark.skipif(
-    aesara.config.floatX == "float32", reason="Test is designed for 64bit precision"
+    pytensor.config.floatX == "float32", reason="Test is designed for 64bit precision"
 )
 def test_interval_near_boundary():
     lb = -1.0
diff --git a/pymc/tests/distributions/test_truncated.py b/pymc/tests/distributions/test_truncated.py
index 20989b42e..0488b5ba3 100644
--- a/pymc/tests/distributions/test_truncated.py
+++ b/pymc/tests/distributions/test_truncated.py
@@ -1,10 +1,10 @@
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy
 
-from aesara.tensor.random.basic import GeometricRV, NormalRV
+from pytensor.tensor.random.basic import GeometricRV, NormalRV
 
 from pymc import Censored, Model, draw, find_MAP, logp
 from pymc.distributions.continuous import Exponential, TruncatedNormalRV
@@ -56,7 +56,7 @@ def _icdf_not_implemented(*args, **kwargs):
 
 @pytest.mark.parametrize("shape_info", ("shape", "dims", "observed"))
 def test_truncation_specialized_op(shape_info):
-    rng = aesara.shared(np.random.default_rng())
+    rng = pytensor.shared(np.random.default_rng())
     x = at.random.normal(0, 10, rng=rng, name="x")
 
     with Model(coords={"dim": range(100)}) as m:
@@ -137,7 +137,7 @@ def test_truncation_continuous_logp(op_type, lower, upper):
     assert isinstance(xt.owner.op, TruncatedRV)
 
     xt_vv = xt.clone()
-    xt_logp_fn = aesara.function([xt_vv], logp(xt, xt_vv))
+    xt_logp_fn = pytensor.function([xt_vv], logp(xt, xt_vv))
 
     ref_xt = scipy.stats.truncnorm(
         (lower - loc) / scale,
@@ -199,7 +199,7 @@ def test_truncation_discrete_logp(op_type, lower, upper):
     assert isinstance(xt.owner.op, TruncatedRV)
 
     xt_vv = xt.clone()
-    xt_logp_fn = aesara.function([xt_vv], logp(xt, xt_vv))
+    xt_logp_fn = pytensor.function([xt_vv], logp(xt, xt_vv))
 
     ref_xt = scipy.stats.geom(p)
     log_norm = np.log(ref_xt.cdf(upper) - ref_xt.cdf(lower - 1))
diff --git a/pymc/tests/distributions/util.py b/pymc/tests/distributions/util.py
index 35bc8dcad..6af649adb 100644
--- a/pymc/tests/distributions/util.py
+++ b/pymc/tests/distributions/util.py
@@ -4,25 +4,25 @@
 from contextlib import ExitStack as does_not_raise
 from typing import Callable, List, Optional
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.random as nr
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.special as sp
 import scipy.stats as st
 
-from aesara.compile.mode import Mode
+from pytensor.compile.mode import Mode
 
 import pymc as pm
 
-from pymc.aesaraf import compile_pymc, floatX, intX
 from pymc.distributions import logcdf, logp
 from pymc.distributions.logprob import _joint_logp
 from pymc.distributions.shape_utils import change_dist_size
 from pymc.initial_point import make_initial_point_fn
 from pymc.logprob.utils import ParameterValueError
+from pymc.pytensorf import compile_pymc, floatX, intX
 from pymc.tests.helpers import SeededTest, select_by_precision
 
 
@@ -48,7 +48,7 @@ def product(domains, n_samples=-1):
 
 
 class Domain:
-    def __init__(self, vals, dtype=aesara.config.floatX, edges=None, shape=None):
+    def __init__(self, vals, dtype=pytensor.config.floatX, edges=None, shape=None):
         # Infinity values must be kept as floats
         vals = [np.array(v, dtype=dtype) if np.all(np.isfinite(v)) else floatX(v) for v in vals]
 
@@ -215,7 +215,7 @@ def build_model(distfam, valuedomain, vardomains, extra_args=None):
     with pm.Model() as m:
         param_vars = {}
         for v, dom in vardomains.items():
-            v_at = aesara.shared(np.asarray(dom.vals[0]))
+            v_at = pytensor.shared(np.asarray(dom.vals[0]))
             v_at.name = v
             param_vars[v] = v_at
         param_vars.update(extra_args)
@@ -351,9 +351,9 @@ def _model_input_dict(model, param_vars, pt):
                 # We need to remove `Assert`s introduced by checks like
                 # `assert_negative_support` and disable test values;
                 # otherwise, we won't be able to create the `RandomVariable`
-                with aesara.config.change_flags(compute_test_value="off"):
+                with pytensor.config.change_flags(compute_test_value="off"):
                     invalid_dist = pymc_dist.dist(**test_params, **extra_args)
-                with aesara.config.change_flags(mode=Mode("py")):
+                with pytensor.config.change_flags(mode=Mode("py")):
                     with pytest.raises(ParameterValueError):
                         logp(invalid_dist, valid_value).eval()
                         pytest.fail(f"test_params={test_params}, valid_value={valid_value}")
@@ -370,7 +370,7 @@ def _model_input_dict(model, param_vars, pt):
     for invalid_value in invalid_values:
         if invalid_value is None:
             continue
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             npt.assert_equal(
                 logp(valid_dist, invalid_value).eval(),
                 -np.inf,
@@ -488,16 +488,16 @@ def check_logcdf(
                     # `assert_negative_support` and disable test values;
                     # otherwise, we won't be able to create the
                     # `RandomVariable`
-                    with aesara.config.change_flags(compute_test_value="off"):
+                    with pytensor.config.change_flags(compute_test_value="off"):
                         invalid_dist = pymc_dist.dist(**test_params)
-                    with aesara.config.change_flags(mode=Mode("py")):
+                    with pytensor.config.change_flags(mode=Mode("py")):
                         with pytest.raises(ParameterValueError):
                             logcdf(invalid_dist, valid_value).eval()
 
     # Test that values below domain edge evaluate to -np.inf
     if np.isfinite(domain.lower):
         below_domain = domain.lower - 1
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             npt.assert_equal(
                 logcdf(valid_dist, below_domain).eval(),
                 -np.inf,
@@ -507,7 +507,7 @@ def check_logcdf(
     # Test that values above domain edge evaluate to 0
     if np.isfinite(domain.upper):
         above_domain = domain.upper + 1
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             npt.assert_equal(
                 logcdf(valid_dist, above_domain).eval(),
                 0,
@@ -516,7 +516,7 @@ def check_logcdf(
 
     # Test that method works with multiple values or raises informative TypeError
     valid_dist = pymc_dist.dist(**valid_params, size=2)
-    with aesara.config.change_flags(mode=Mode("py")):
+    with pytensor.config.change_flags(mode=Mode("py")):
         try:
             logcdf(valid_dist, np.array([valid_value, valid_value])).eval()
         except TypeError as err:
@@ -555,7 +555,7 @@ def check_selfconsistency_discrete_logcdf(
         for param_name, param_value in params.items():
             param_vars[param_name].set_value(param_value)
 
-        with aesara.config.change_flags(mode=Mode("py")):
+        with pytensor.config.change_flags(mode=Mode("py")):
             npt.assert_almost_equal(
                 dist_logcdf({"value": value}),
                 sp.logsumexp([dist_logp({"value": value}) for value in values]),
@@ -783,7 +783,7 @@ def test_distribution(self):
     def _instantiate_pymc_rv(self, dist_params=None):
         params = dist_params if dist_params else self.pymc_dist_params
         self.pymc_rv = self.pymc_dist.dist(
-            **params, size=self.size, rng=aesara.shared(self.get_random_state(reset=True))
+            **params, size=self.size, rng=pytensor.shared(self.get_random_state(reset=True))
         )
 
     def check_pymc_draws_match_reference(self):
@@ -794,14 +794,14 @@ def check_pymc_draws_match_reference(self):
         )
 
     def check_pymc_params_match_rv_op(self):
-        aesara_dist_inputs = self.pymc_rv.get_parents()[0].inputs[3:]
-        assert len(self.expected_rv_op_params) == len(aesara_dist_inputs)
+        pytensor_dist_inputs = self.pymc_rv.get_parents()[0].inputs[3:]
+        assert len(self.expected_rv_op_params) == len(pytensor_dist_inputs)
         for (expected_name, expected_value), actual_variable in zip(
-            self.expected_rv_op_params.items(), aesara_dist_inputs
+            self.expected_rv_op_params.items(), pytensor_dist_inputs
         ):
 
             # Add additional line to evaluate symbolic inputs to distributions
-            if isinstance(expected_value, aesara.tensor.Variable):
+            if isinstance(expected_value, pytensor.tensor.Variable):
                 expected_value = expected_value.eval()
 
             npt.assert_almost_equal(expected_value, actual_variable.eval(), decimal=self.decimal)
diff --git a/pymc/tests/gp/test_cov.py b/pymc/tests/gp/test_cov.py
index 550f68727..68fc3a5ea 100644
--- a/pymc/tests/gp/test_cov.py
+++ b/pymc/tests/gp/test_cov.py
@@ -12,10 +12,10 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 
 import pymc as pm
@@ -204,7 +204,7 @@ def test_covexp_numpy(self):
         Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
-    def test_covexp_aesara(self):
+    def test_covexp_pytensor(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             a = at.alloc(2.0, 1, 1)
@@ -218,7 +218,7 @@ def test_covexp_aesara(self):
     def test_covexp_shared(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
-            a = aesara.shared(2.0)
+            a = pytensor.shared(2.0)
             cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
         K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940**2, atol=1e-3)
diff --git a/pymc/tests/gp/test_gp.py b/pymc/tests/gp/test_gp.py
index 658bd2252..aba927b46 100644
--- a/pymc/tests/gp/test_gp.py
+++ b/pymc/tests/gp/test_gp.py
@@ -76,6 +76,7 @@ class TestMarginalApproxSigmaParams(TestSigmaParams):
 
     gp_implementation = pm.gp.MarginalApprox
 
+    @pytest.mark.xfail(reason="Possible shape problem, see #6366")
     def test_catch_warnings(self):
         """Warning from using the old noise parameter."""
         with self.model:
diff --git a/pymc/tests/gp/test_util.py b/pymc/tests/gp/test_util.py
index 43213bf98..92d29fe29 100644
--- a/pymc/tests/gp/test_util.py
+++ b/pymc/tests/gp/test_util.py
@@ -12,9 +12,9 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara.tensor as at
 import numpy as np
 import numpy.testing as npt
+import pytensor.tensor as at
 import pytest
 
 import pymc as pm
diff --git a/pymc/tests/helpers.py b/pymc/tests/helpers.py
index 85ab0f68c..fc2994136 100644
--- a/pymc/tests/helpers.py
+++ b/pymc/tests/helpers.py
@@ -19,19 +19,19 @@
 
 from logging.handlers import BufferingHandler
 
-import aesara
 import numpy as np
 import numpy.random as nr
+import pytensor
 
-from aesara.gradient import verify_grad as at_verify_grad
-from aesara.graph import ancestors
-from aesara.graph.rewriting.basic import in2out
-from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
-from aesara.tensor.random.op import RandomVariable
+from pytensor.gradient import verify_grad as at_verify_grad
+from pytensor.graph import ancestors
+from pytensor.graph.rewriting.basic import in2out
+from pytensor.sandbox.rng_mrg import MRG_RandomStream as RandomStream
+from pytensor.tensor.random.op import RandomVariable
 
 import pymc as pm
 
-from pymc.aesaraf import at_rng, local_check_parameter_to_ninf_switch, set_at_rng
+from pymc.pytensorf import at_rng, local_check_parameter_to_ninf_switch, set_at_rng
 from pymc.tests.checks import close_to
 from pymc.tests.models import mv_simple, mv_simple_coarse
 
@@ -121,7 +121,7 @@ def match_value(self, k, dv, v):
 
 def select_by_precision(float64, float32):
     """Helper function to choose reasonable decimal cutoffs for different floatX modes."""
-    decimal = float64 if aesara.config.floatX == "float64" else float32
+    decimal = float64 if pytensor.config.floatX == "float64" else float32
     return decimal
 
 
@@ -149,7 +149,7 @@ def assert_random_state_equal(state1, state2):
 # all that matters are the shape of the draws or deterministic values of observed data).
 # DO NOT USE UNLESS YOU HAVE A GOOD REASON TO!
 fast_unstable_sampling_mode = (
-    aesara.compile.mode.FAST_COMPILE
+    pytensor.compile.mode.FAST_COMPILE
     # Remove slow rewrite phases
     .excluding("canonicalize", "specialize")
     # Include necessary rewrites for proper logp handling
@@ -215,7 +215,7 @@ def continuous_steps(self, step, step_kwargs):
             c1 = pm.HalfNormal("c1")
             c2 = pm.HalfNormal("c2")
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 assert [m.rvs_to_values[c1]] == step([c1], **step_kwargs).vars
             assert {m.rvs_to_values[c1], m.rvs_to_values[c2]} == set(
                 step([c1, c2], **step_kwargs).vars
diff --git a/pymc/tests/logprob/test_abstract.py b/pymc/tests/logprob/test_abstract.py
index 684cc3319..091e59a6f 100644
--- a/pymc/tests/logprob/test_abstract.py
+++ b/pymc/tests/logprob/test_abstract.py
@@ -36,11 +36,11 @@
 
 import re
 
-import aesara.tensor as at
+import pytensor.tensor as at
 import pytest
 
-from aesara.scalar import Exp, exp
-from aesara.tensor.random.basic import NormalRV
+from pytensor.scalar import Exp, exp
+from pytensor.tensor.random.basic import NormalRV
 
 from pymc.logprob.abstract import (
     MeasurableElemwise,
diff --git a/pymc/tests/logprob/test_censoring.py b/pymc/tests/logprob/test_censoring.py
index 2353e8c0b..1884180ca 100644
--- a/pymc/tests/logprob/test_censoring.py
+++ b/pymc/tests/logprob/test_censoring.py
@@ -34,9 +34,9 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy as sp
 import scipy.stats as st
@@ -46,7 +46,7 @@
 from pymc.tests.helpers import assert_no_rvs
 
 
-@aesara.config.change_flags(compute_test_value="raise")
+@pytensor.config.change_flags(compute_test_value="raise")
 def test_continuous_rv_clip():
     x_rv = at.random.normal(0.5, 1)
     cens_x_rv = at.clip(x_rv, -2, 2)
@@ -57,7 +57,7 @@ def test_continuous_rv_clip():
     logp = joint_logprob({cens_x_rv: cens_x_vv})
     assert_no_rvs(logp)
 
-    logp_fn = aesara.function([cens_x_vv], logp)
+    logp_fn = pytensor.function([cens_x_vv], logp)
     ref_scipy = st.norm(0.5, 1)
 
     assert logp_fn(-3) == -np.inf
@@ -77,7 +77,7 @@ def test_discrete_rv_clip():
     logp = joint_logprob({cens_x_rv: cens_x_vv})
     assert_no_rvs(logp)
 
-    logp_fn = aesara.function([cens_x_vv], logp)
+    logp_fn = pytensor.function([cens_x_vv], logp)
     ref_scipy = st.poisson(2)
 
     assert logp_fn(0) == -np.inf
@@ -101,7 +101,7 @@ def test_one_sided_clip():
     assert_no_rvs(lb_logp)
     assert_no_rvs(ub_logp)
 
-    logp_fn = aesara.function([lb_cens_x_vv, ub_cens_x_vv], [lb_logp, ub_logp])
+    logp_fn = pytensor.function([lb_cens_x_vv, ub_cens_x_vv], [lb_logp, ub_logp])
     ref_scipy = st.norm(0, 1)
 
     assert np.all(np.array(logp_fn(-2, 2)) == -np.inf)
@@ -119,7 +119,7 @@ def test_useless_clip():
     logp = joint_logprob({cens_x_rv: cens_x_vv}, sum=False)
     assert_no_rvs(logp)
 
-    logp_fn = aesara.function([cens_x_vv], logp)
+    logp_fn = pytensor.function([cens_x_vv], logp)
     ref_scipy = st.norm(0.5, 1)
 
     np.testing.assert_allclose(logp_fn([-2, 0, 2]), ref_scipy.logpdf([-2, 0, 2]))
@@ -135,7 +135,7 @@ def test_random_clip():
     logp = joint_logprob({cens_x_rv: cens_x_vv, lb_rv: lb_vv}, sum=False)
     assert_no_rvs(logp)
 
-    logp_fn = aesara.function([lb_vv, cens_x_vv], logp)
+    logp_fn = pytensor.function([lb_vv, cens_x_vv], logp)
     res = logp_fn([0, -1], [-1, -1])
     assert res[0] == -np.inf
     assert res[1] != -np.inf
@@ -201,7 +201,7 @@ def test_deterministic_clipping():
     logp = joint_logprob({x_rv: x_vv, y_rv: y_vv})
     assert_no_rvs(logp)
 
-    logp_fn = aesara.function([x_vv, y_vv], logp)
+    logp_fn = pytensor.function([x_vv, y_vv], logp)
     assert np.isclose(
         logp_fn(-1, 1),
         st.norm(0, 1).logpdf(-1) + st.norm(0, 1).logpdf(1),
diff --git a/pymc/tests/logprob/test_composite_logprob.py b/pymc/tests/logprob/test_composite_logprob.py
index 85ac92e7d..457449c83 100644
--- a/pymc/tests/logprob/test_composite_logprob.py
+++ b/pymc/tests/logprob/test_composite_logprob.py
@@ -34,9 +34,9 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import scipy.stats as st
 
 from pymc.logprob import joint_logprob
@@ -63,7 +63,7 @@ def test_scalar_clipped_mixture():
 
     logp = joint_logprob({idxs: idxs_vv, mix: mix_vv})
 
-    logp_fn = aesara.function([idxs_vv, mix_vv], logp)
+    logp_fn = pytensor.function([idxs_vv, mix_vv], logp)
     assert logp_fn(0, 0.4) == -np.inf
     assert np.isclose(logp_fn(0, 0.5), st.norm.logcdf(0.5, 1) + np.log(0.6))
     assert np.isclose(logp_fn(0, 1.3), st.norm.logpdf(1.3, 1) + np.log(0.6))
@@ -99,7 +99,7 @@ def test_nested_scalar_mixtures():
     mix12_vv = mix12.clone()
 
     logp = joint_logprob({idxs1: idxs1_vv, idxs2: idxs2_vv, idxs12: idxs12_vv, mix12: mix12_vv})
-    logp_fn = aesara.function([idxs1_vv, idxs2_vv, idxs12_vv, mix12_vv], logp)
+    logp_fn = pytensor.function([idxs1_vv, idxs2_vv, idxs12_vv, mix12_vv], logp)
 
     expected_mu_logpdf = st.norm.logpdf(0) + np.log(0.5) * 3
     assert np.isclose(logp_fn(0, 0, 0, -50), expected_mu_logpdf)
@@ -158,7 +158,7 @@ def test_double_log_transform_rv():
 
     y_vv = y_rv.clone()
     logp = joint_logprob({y_rv: y_vv}, sum=False)
-    logp_fn = aesara.function([y_vv], logp)
+    logp_fn = pytensor.function([y_vv], logp)
 
     log_log_y_val = np.asarray(0.5)
     log_y_val = np.exp(log_log_y_val)
@@ -180,7 +180,7 @@ def test_affine_transform_rv():
 
     logp = joint_logprob({y_rv: y_vv}, sum=False)
     assert_no_rvs(logp)
-    logp_fn = aesara.function([loc, scale, y_vv], logp)
+    logp_fn = pytensor.function([loc, scale, y_vv], logp)
 
     loc_test_val = 4.0
     scale_test_val = np.full(rv_size, 0.5)
@@ -201,7 +201,7 @@ def test_affine_log_transform_rv():
     y_vv = y_rv.clone()
 
     logp = joint_logprob({y_rv: y_vv}, sum=False)
-    logp_fn = aesara.function([a, b, y_vv], logp)
+    logp_fn = pytensor.function([a, b, y_vv], logp)
 
     a_val = -1.5
     b_val = 3.0
diff --git a/pymc/tests/logprob/test_cumsum.py b/pymc/tests/logprob/test_cumsum.py
index e1229a29a..2d34872b1 100644
--- a/pymc/tests/logprob/test_cumsum.py
+++ b/pymc/tests/logprob/test_cumsum.py
@@ -34,9 +34,9 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.stats as st
 
@@ -111,7 +111,7 @@ def test_deterministic_cumsum():
     logp = joint_logprob({x_rv: x_vv, y_rv: y_vv})
     assert_no_rvs(logp)
 
-    logp_fn = aesara.function([x_vv, y_vv], logp)
+    logp_fn = pytensor.function([x_vv, y_vv], logp)
     assert np.isclose(
         logp_fn(np.ones(5), np.arange(5) + 1),
         st.norm(1, 1).logpdf(1) * 10,
diff --git a/pymc/tests/logprob/test_joint_logprob.py b/pymc/tests/logprob/test_joint_logprob.py
index 82d697f3d..a83b871ab 100644
--- a/pymc/tests/logprob/test_joint_logprob.py
+++ b/pymc/tests/logprob/test_joint_logprob.py
@@ -36,14 +36,14 @@
 
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.stats.distributions as sp
 
-from aesara.graph.basic import ancestors, equal_computations
-from aesara.tensor.subtensor import (
+from pytensor.graph.basic import ancestors, equal_computations
+from pytensor.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
     AdvancedSubtensor,
@@ -220,7 +220,7 @@ def test_joint_logprob_subtensor():
     mu_base = np.power(10, np.arange(np.prod(size))).reshape(size)
     mu = np.stack([mu_base, -mu_base])
     sigma = 0.001
-    rng = aesara.shared(np.random.RandomState(232), borrow=True)
+    rng = pytensor.shared(np.random.RandomState(232), borrow=True)
 
     A_rv = at.random.normal(mu, sigma, rng=rng)
     A_rv.name = "A"
@@ -242,12 +242,12 @@ def test_joint_logprob_subtensor():
 
     A_idx_logp = joint_logprob({A_idx: A_idx_value_var, I_rv: I_value_var}, sum=False)
 
-    logp_vals_fn = aesara.function([A_idx_value_var, I_value_var], A_idx_logp)
+    logp_vals_fn = pytensor.function([A_idx_value_var, I_value_var], A_idx_logp)
 
     # The compiled graph should not contain any `RandomVariables`
     assert_no_rvs(logp_vals_fn.maker.fgraph.outputs[0])
 
-    decimals = 6 if aesara.config.floatX == "float64" else 4
+    decimals = 6 if pytensor.config.floatX == "float64" else 4
 
     test_val_rng = np.random.RandomState(3238)
 
diff --git a/pymc/tests/logprob/test_mixture.py b/pymc/tests/logprob/test_mixture.py
index 473171734..984bf264b 100644
--- a/pymc/tests/logprob/test_mixture.py
+++ b/pymc/tests/logprob/test_mixture.py
@@ -34,16 +34,16 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.stats.distributions as sp
 
-from aesara.graph.basic import Variable, equal_computations
-from aesara.tensor.random.basic import CategoricalRV
-from aesara.tensor.shape import shape_tuple
-from aesara.tensor.subtensor import as_index_constant
+from pytensor.graph.basic import Variable, equal_computations
+from pytensor.tensor.random.basic import CategoricalRV
+from pytensor.tensor.shape import shape_tuple
+from pytensor.tensor.subtensor import as_index_constant
 
 from pymc.logprob.joint_logprob import factorized_joint_logprob, joint_logprob
 from pymc.logprob.mixture import MixtureRV, expand_indices
@@ -102,7 +102,7 @@ def create_mix_model(size, axis):
         joint_logprob({M_rv: m_vv, I_rv: i_vv})
 
 
-@aesara.config.change_flags(compute_test_value="warn")
+@pytensor.config.change_flags(compute_test_value="warn")
 @pytest.mark.parametrize(
     "op_constructor",
     [
@@ -141,12 +141,12 @@ def test_compute_test_value(op_constructor):
 @pytest.mark.parametrize(
     "p_val, size",
     [
-        (np.array(0.0, dtype=aesara.config.floatX), ()),
-        (np.array(1.0, dtype=aesara.config.floatX), ()),
-        (np.array(0.0, dtype=aesara.config.floatX), (2,)),
-        (np.array(1.0, dtype=aesara.config.floatX), (2, 1)),
-        (np.array(1.0, dtype=aesara.config.floatX), (2, 3)),
-        (np.array([0.1, 0.9], dtype=aesara.config.floatX), (2, 3)),
+        (np.array(0.0, dtype=pytensor.config.floatX), ()),
+        (np.array(1.0, dtype=pytensor.config.floatX), ()),
+        (np.array(0.0, dtype=pytensor.config.floatX), (2,)),
+        (np.array(1.0, dtype=pytensor.config.floatX), (2, 1)),
+        (np.array(1.0, dtype=pytensor.config.floatX), (2, 3)),
+        (np.array([0.1, 0.9], dtype=pytensor.config.floatX), (2, 3)),
     ],
 )
 def test_hetero_mixture_binomial(p_val, size):
@@ -162,7 +162,7 @@ def test_hetero_mixture_binomial(p_val, size):
         p_val_1 = p_val
     else:
         p_at = at.vector("p")
-        p_at.tag.test_value = np.array(p_val, dtype=aesara.config.floatX)
+        p_at.tag.test_value = np.array(p_val, dtype=pytensor.config.floatX)
         I_rv = srng.categorical(p_at, size=size, name="I")
         p_val_1 = p_val[1]
 
@@ -177,11 +177,11 @@ def test_hetero_mixture_binomial(p_val, size):
 
     M_logp = joint_logprob({M_rv: m_vv, I_rv: i_vv}, sum=False)
 
-    M_logp_fn = aesara.function([p_at, m_vv, i_vv], M_logp)
+    M_logp_fn = pytensor.function([p_at, m_vv, i_vv], M_logp)
 
     assert_no_rvs(M_logp_fn.maker.fgraph.outputs[0])
 
-    decimals = 6 if aesara.config.floatX == "float64" else 4
+    decimals = 6 if pytensor.config.floatX == "float64" else 4
 
     test_val_rng = np.random.RandomState(3238)
 
@@ -209,18 +209,18 @@ def test_hetero_mixture_binomial(p_val, size):
         # Scalar mixture components, scalar index
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (),
             (),
             (),
@@ -229,18 +229,18 @@ def test_hetero_mixture_binomial(p_val, size):
         # Scalar mixture components, vector index
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (),
             (6,),
             (),
@@ -248,18 +248,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array([0, -100], dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array([0, -100], dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array([0.5, 1], dtype=aesara.config.floatX),
-                np.array([0.5, 1], dtype=aesara.config.floatX),
+                np.array([0.5, 1], dtype=pytensor.config.floatX),
+                np.array([0.5, 1], dtype=pytensor.config.floatX),
             ),
             (
-                np.array([100, 1000], dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array([100, 1000], dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([[0.1, 0.5, 0.4], [0.4, 0.1, 0.5]], dtype=aesara.config.floatX),
+            np.array([[0.1, 0.5, 0.4], [0.4, 0.1, 0.5]], dtype=pytensor.config.floatX),
             (2,),
             (2,),
             (),
@@ -267,18 +267,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array([0, -100], dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array([0, -100], dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array([0.5, 1], dtype=aesara.config.floatX),
-                np.array([0.5, 1], dtype=aesara.config.floatX),
+                np.array([0.5, 1], dtype=pytensor.config.floatX),
+                np.array([0.5, 1], dtype=pytensor.config.floatX),
             ),
             (
-                np.array([100, 1000], dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array([100, 1000], dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([[0.1, 0.5, 0.4], [0.4, 0.1, 0.5]], dtype=aesara.config.floatX),
+            np.array([[0.1, 0.5, 0.4], [0.4, 0.1, 0.5]], dtype=pytensor.config.floatX),
             None,
             None,
             (),
@@ -286,18 +286,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (),
             (),
             (),
@@ -305,18 +305,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (2,),
             (2,),
             (),
@@ -324,18 +324,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (2, 3),
             (2, 3),
             (),
@@ -343,18 +343,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (2, 3),
             (),
             (),
@@ -362,18 +362,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (3,),
             (3,),
             (slice(None),),
@@ -381,18 +381,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (5,),
             (5,),
             (np.arange(5),),
@@ -400,18 +400,18 @@ def test_hetero_mixture_binomial(p_val, size):
         ),
         (
             (
-                np.array(0, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(0, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(0.5, dtype=aesara.config.floatX),
-                np.array(0.5, dtype=aesara.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
+                np.array(0.5, dtype=pytensor.config.floatX),
             ),
             (
-                np.array(100, dtype=aesara.config.floatX),
-                np.array(1, dtype=aesara.config.floatX),
+                np.array(100, dtype=pytensor.config.floatX),
+                np.array(1, dtype=pytensor.config.floatX),
             ),
-            np.array([0.1, 0.5, 0.4], dtype=aesara.config.floatX),
+            np.array([0.1, 0.5, 0.4], dtype=pytensor.config.floatX),
             (5,),
             (5,),
             (np.arange(5), None),
@@ -430,7 +430,7 @@ def test_hetero_mixture_categorical(
 
     p_at = at.as_tensor(p_val).type()
     p_at.name = "p"
-    p_at.tag.test_value = np.array(p_val, dtype=aesara.config.floatX)
+    p_at.tag.test_value = np.array(p_val, dtype=pytensor.config.floatX)
     I_rv = srng.categorical(p_at, size=idx_size, name="I")
 
     i_vv = I_rv.clone()
@@ -448,13 +448,13 @@ def test_hetero_mixture_categorical(
 
     logp_parts = factorized_joint_logprob({M_rv: m_vv, I_rv: i_vv}, sum=False)
 
-    I_logp_fn = aesara.function([p_at, i_vv], logp_parts[i_vv])
-    M_logp_fn = aesara.function([m_vv, i_vv], logp_parts[m_vv])
+    I_logp_fn = pytensor.function([p_at, i_vv], logp_parts[i_vv])
+    M_logp_fn = pytensor.function([m_vv, i_vv], logp_parts[m_vv])
 
     assert_no_rvs(I_logp_fn.maker.fgraph.outputs[0])
     assert_no_rvs(M_logp_fn.maker.fgraph.outputs[0])
 
-    decimals = 6 if aesara.config.floatX == "float64" else 4
+    decimals = 6 if pytensor.config.floatX == "float64" else 4
 
     test_val_rng = np.random.RandomState(3238)
 
@@ -765,7 +765,7 @@ def test_switch_mixture():
     assert isinstance(fgraph.outputs[0].owner.op, MixtureRV)
     assert not hasattr(
         fgraph.outputs[0].tag, "test_value"
-    )  # aesara.config.compute_test_value == "off"
+    )  # pytensor.config.compute_test_value == "off"
     assert fgraph.outputs[0].name is None
 
     Z1_rv.name = "Z1"
diff --git a/pymc/tests/logprob/test_rewriting.py b/pymc/tests/logprob/test_rewriting.py
index 09066f979..45cbdbc00 100644
--- a/pymc/tests/logprob/test_rewriting.py
+++ b/pymc/tests/logprob/test_rewriting.py
@@ -34,13 +34,13 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.rewriting.basic import in2out
-from aesara.graph.rewriting.utils import rewrite_graph
-from aesara.tensor.elemwise import DimShuffle, Elemwise
-from aesara.tensor.subtensor import Subtensor
+from pytensor.graph.rewriting.basic import in2out
+from pytensor.graph.rewriting.utils import rewrite_graph
+from pytensor.tensor.elemwise import DimShuffle, Elemwise
+from pytensor.tensor.subtensor import Subtensor
 
 from pymc.logprob.rewriting import local_lift_DiracDelta
 from pymc.logprob.utils import DiracDelta, dirac_delta
@@ -81,5 +81,5 @@ def test_local_remove_DiracDelta():
     c_at = at.vector()
     dd_at = dirac_delta(c_at)
 
-    fn = aesara.function([c_at], dd_at)
+    fn = pytensor.function([c_at], dd_at)
     assert not any(isinstance(node.op, DiracDelta) for node in fn.maker.fgraph.toposort())
diff --git a/pymc/tests/logprob/test_scan.py b/pymc/tests/logprob/test_scan.py
index 3802c1072..b8604bdd7 100644
--- a/pymc/tests/logprob/test_scan.py
+++ b/pymc/tests/logprob/test_scan.py
@@ -34,14 +34,14 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from aesara import Mode
-from aesara.raise_op import assert_op
-from aesara.scan.utils import ScanArgs
+from pytensor import Mode
+from pytensor.raise_op import assert_op
+from pytensor.scan.utils import ScanArgs
 
 from pymc.logprob.abstract import logprob
 from pymc.logprob.joint_logprob import factorized_joint_logprob, joint_logprob
@@ -76,7 +76,7 @@ def test_convert_outer_out_to_in_sit_sot():
     """
 
     rng_state = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(1234)))
-    rng_tt = aesara.shared(rng_state, name="rng", borrow=True)
+    rng_tt = pytensor.shared(rng_state, name="rng", borrow=True)
     rng_tt.tag.is_rng = True
     rng_tt.default_update = rng_tt
 
@@ -93,15 +93,15 @@ def input_step_fn(mu_tm1, y_tm1, rng):
         mu.name = "mu_t"
         return mu, at.random.normal(mu, 1.0, rng=rng, name="Y_t")
 
-    (mu_tt, Y_rv), _ = aesara.scan(
+    (mu_tt, Y_rv), _ = pytensor.scan(
         fn=input_step_fn,
         outputs_info=[
             {
-                "initial": at.as_tensor_variable(0.0, dtype=aesara.config.floatX),
+                "initial": at.as_tensor_variable(0.0, dtype=pytensor.config.floatX),
                 "taps": [-1],
             },
             {
-                "initial": at.as_tensor_variable(0.0, dtype=aesara.config.floatX),
+                "initial": at.as_tensor_variable(0.0, dtype=pytensor.config.floatX),
                 "taps": [-1],
             },
         ],
@@ -136,12 +136,12 @@ def output_step_fn(y_t, y_tm1, mu_tm1):
         logp.name = "logp"
         return mu, logp
 
-    (mu_tt, Y_logp), _ = aesara.scan(
+    (mu_tt, Y_logp), _ = pytensor.scan(
         fn=output_step_fn,
         sequences=[{"input": Y_obs, "taps": [0, -1]}],
         outputs_info=[
             {
-                "initial": at.as_tensor_variable(0.0, dtype=aesara.config.floatX),
+                "initial": at.as_tensor_variable(0.0, dtype=pytensor.config.floatX),
                 "taps": [-1],
             },
             {},
@@ -190,7 +190,7 @@ def test_convert_outer_out_to_in_mit_sot():
     """
 
     rng_state = np.random.default_rng(1234)
-    rng_tt = aesara.shared(rng_state, name="rng", borrow=True)
+    rng_tt = pytensor.shared(rng_state, name="rng", borrow=True)
     rng_tt.tag.is_rng = True
     rng_tt.default_update = rng_tt
 
@@ -203,7 +203,7 @@ def input_step_fn(y_tm1, y_tm2, rng):
         y_tm2.name = "y_tm2"
         return at.random.normal(y_tm1 + y_tm2, 1.0, rng=rng, name="Y_t")
 
-    Y_rv, _ = aesara.scan(
+    Y_rv, _ = pytensor.scan(
         fn=input_step_fn,
         outputs_info=[
             {"initial": at.as_tensor_variable(np.r_[-1.0, 0.0]), "taps": [-1, -2]},
@@ -235,7 +235,7 @@ def output_step_fn(y_t, y_tm1, y_tm2):
         logp.name = "logp(y_t)"
         return logp
 
-    Y_logp, _ = aesara.scan(
+    Y_logp, _ = pytensor.scan(
         fn=output_step_fn,
         sequences=[{"input": Y_obs, "taps": [0, -1, -2]}],
         outputs_info=[{}],
@@ -294,7 +294,7 @@ def test_scan_joint_logprob(require_inner_rewrites):
     mus_tt = at.matrix("mus_t")
 
     mus_val = np.stack([np.arange(0.0, 10), np.arange(0.0, -10, -1)], axis=-1).astype(
-        aesara.config.floatX
+        pytensor.config.floatX
     )
     mus_tt.tag.test_value = mus_val
 
@@ -317,7 +317,7 @@ def scan_fn(mus_t, sigma_t, Gamma_t):
 
         return Y_t, S_t
 
-    (Y_rv, S_rv), _ = aesara.scan(
+    (Y_rv, S_rv), _ = pytensor.scan(
         fn=scan_fn,
         sequences=[mus_tt, sigmas_tt],
         non_sequences=[Gamma_rv],
@@ -348,7 +348,7 @@ def scan_fn(mus_t, sigma_t, Gamma_t):
         Gamma_vv: Gamma_val,
     }
 
-    y_logp_fn = aesara.function(list(test_point.keys()), y_logp)
+    y_logp_fn = pytensor.function(list(test_point.keys()), y_logp)
 
     assert_no_rvs(y_logp_fn.maker.fgraph.outputs[0])
 
@@ -362,7 +362,7 @@ def scan_fn(mus_t, sigma_t, Y_t_val, S_t_val, Gamma_t):
         S_t_logp.name = "log(S_t=s_t)"
         return Y_t_logp, S_t_logp
 
-    (Y_rv_logp, S_rv_logp), _ = aesara.scan(
+    (Y_rv_logp, S_rv_logp), _ = pytensor.scan(
         fn=scan_fn,
         sequences=[mus_tt, sigmas_tt, y_vv, s_vv],
         non_sequences=[Gamma_vv],
@@ -387,7 +387,7 @@ def scan_fn(mus_t, sigma_t, Y_t_val, S_t_val, Gamma_t):
 
 
 @pytest.mark.xfail(reason="see #148")
-@aesara.config.change_flags(compute_test_value="raise")
+@pytensor.config.change_flags(compute_test_value="raise")
 @pytest.mark.xfail(reason="see #148")
 def test_initial_values():
     srng = at.random.RandomStream(seed=2320)
@@ -406,7 +406,7 @@ def step_fn(S_tm1, Gamma):
         S_t = srng.categorical(Gamma[S_tm1], name="S_t")
         return S_t
 
-    S_1T_rv, _ = aesara.scan(
+    S_1T_rv, _ = pytensor.scan(
         fn=step_fn,
         outputs_info=[{"initial": S_0_rv, "taps": [-1]}],
         non_sequences=[Gamma_at],
@@ -432,7 +432,7 @@ def step_fn(S_tm1, Gamma):
         s_prev = s
 
     S_0T_logp = sum(v.sum() for v in logp_parts.values())
-    S_0T_logp_fn = aesara.function([s_0_vv, s_1T_vv, Gamma_at], S_0T_logp)
+    S_0T_logp_fn = pytensor.function([s_0_vv, s_1T_vv, Gamma_at], S_0T_logp)
     res = S_0T_logp_fn(s_0_val, s_1T_val, Gamma_val)
 
     assert res == pytest.approx(exp_res)
@@ -441,7 +441,7 @@ def step_fn(S_tm1, Gamma):
 @pytest.mark.parametrize("remove_asserts", (True, False))
 def test_mode_is_kept(remove_asserts):
     mode = Mode().including("local_remove_all_assert") if remove_asserts else None
-    x, _ = aesara.scan(
+    x, _ = pytensor.scan(
         fn=lambda x: at.random.normal(assert_op(x, x > 0)),
         outputs_info=[at.ones(())],
         n_steps=10,
@@ -449,7 +449,7 @@ def test_mode_is_kept(remove_asserts):
     )
     x.name = "x"
     x_vv = x.clone()
-    x_logp = aesara.function([x_vv], joint_logprob({x: x_vv}))
+    x_logp = pytensor.function([x_vv], joint_logprob({x: x_vv}))
 
     x_test_val = np.full((10,), -1)
     if remove_asserts:
diff --git a/pymc/tests/logprob/test_tensor.py b/pymc/tests/logprob/test_tensor.py
index 0c2614291..fb6812ef4 100644
--- a/pymc/tests/logprob/test_tensor.py
+++ b/pymc/tests/logprob/test_tensor.py
@@ -34,15 +34,15 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 
-from aesara import tensor as at
-from aesara.graph import RewriteDatabaseQuery
-from aesara.graph.rewriting.basic import in2out
-from aesara.graph.rewriting.utils import rewrite_graph
-from aesara.tensor.extra_ops import BroadcastTo
+from pytensor import tensor as at
+from pytensor.graph import RewriteDatabaseQuery
+from pytensor.graph.rewriting.basic import in2out
+from pytensor.graph.rewriting.utils import rewrite_graph
+from pytensor.tensor.extra_ops import BroadcastTo
 from scipy import stats as st
 
 from pymc.logprob import factorized_joint_logprob, joint_logprob
@@ -230,7 +230,7 @@ def test_join_mixed_ndim_supp():
         joint_logprob({y_rv: y_vv})
 
 
-@aesara.config.change_flags(cxx="")
+@pytensor.config.change_flags(cxx="")
 @pytest.mark.parametrize(
     "ds_order",
     [
@@ -275,8 +275,8 @@ def test_measurable_dimshuffle(ds_order, multivariate):
     ds_logp = joint_logprob({ds_rv: ds_vv}, sum=False, ir_rewriter=ir_rewriter)
     assert ds_logp is not None
 
-    ref_logp_fn = aesara.function([base_vv], ref_logp)
-    ds_logp_fn = aesara.function([ds_vv], ds_logp)
+    ref_logp_fn = pytensor.function([base_vv], ref_logp)
+    ds_logp_fn = pytensor.function([ds_vv], ds_logp)
 
     base_test_value = base_rv.eval()
     ds_test_value = at.constant(base_test_value).dimshuffle(ds_order).eval()
diff --git a/pymc/tests/logprob/test_transforms.py b/pymc/tests/logprob/test_transforms.py
index 1e599900f..0ec890300 100644
--- a/pymc/tests/logprob/test_transforms.py
+++ b/pymc/tests/logprob/test_transforms.py
@@ -34,16 +34,16 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy as sp
 import scipy.special
 
-from aesara.graph.basic import equal_computations
-from aesara.graph.fg import FunctionGraph
 from numdifftools import Jacobian
+from pytensor.graph.basic import equal_computations
+from pytensor.graph.fg import FunctionGraph
 
 from pymc.logprob.joint_logprob import factorized_joint_logprob, joint_logprob
 from pymc.logprob.transforms import (
@@ -118,7 +118,7 @@ def logpdf(self, value):
             lambda mean, scale: sp.stats.invgauss(mean / scale, scale=scale),
             (),
             marks=pytest.mark.xfail(
-                reason="We don't use Aesara's Wald operator",
+                reason="We don't use PyTensor's Wald operator",
                 raises=NotImplementedError,
             ),
         ),
@@ -170,7 +170,7 @@ def logpdf(self, value):
             lambda c: sp.stats.weibull_min(c),
             (),
             marks=pytest.mark.xfail(
-                reason="We don't use Aesara's Weibull operator",
+                reason="We don't use PyTensor's Weibull operator",
                 raises=NotImplementedError,
             ),
         ),
@@ -233,14 +233,16 @@ def test_transformed_logprob(at_dist, dist_params, sp_dist, size):
 
     test_val_rng = np.random.RandomState(3238)
 
-    logp_vals_fn = aesara.function([a_value_var, b_value_var], res)
+    logp_vals_fn = pytensor.function([a_value_var, b_value_var], res)
 
     a_trans_op = _default_transformed_rv(a.owner.op, a.owner).op
     transform = a_trans_op.transform
 
-    a_forward_fn = aesara.function([a_value_var], transform.forward(a_value_var, *a.owner.inputs))
-    a_backward_fn = aesara.function([a_value_var], transform.backward(a_value_var, *a.owner.inputs))
-    log_jac_fn = aesara.function(
+    a_forward_fn = pytensor.function([a_value_var], transform.forward(a_value_var, *a.owner.inputs))
+    a_backward_fn = pytensor.function(
+        [a_value_var], transform.backward(a_value_var, *a.owner.inputs)
+    )
+    log_jac_fn = pytensor.function(
         [a_value_var],
         transform.log_jac_det(a_value_var, *a.owner.inputs),
         on_unused_input="ignore",
@@ -595,7 +597,7 @@ def test_exp_transform_rv():
 
     y_vv = y_rv.clone()
     logp = joint_logprob({y_rv: y_vv}, sum=False)
-    logp_fn = aesara.function([y_vv], logp)
+    logp_fn = pytensor.function([y_vv], logp)
 
     y_val = [0.1, 0.3]
     np.testing.assert_allclose(
@@ -611,7 +613,7 @@ def test_log_transform_rv():
 
     y_vv = y_rv.clone()
     logp = joint_logprob({y_rv: y_vv}, sum=False)
-    logp_fn = aesara.function([y_vv], logp)
+    logp_fn = pytensor.function([y_vv], logp)
 
     y_val = [0.1, 0.3]
     np.testing.assert_allclose(
@@ -637,7 +639,7 @@ def test_loc_transform_rv(rv_size, loc_type):
 
     logp = joint_logprob({y_rv: y_vv}, sum=False)
     assert_no_rvs(logp)
-    logp_fn = aesara.function([loc, y_vv], logp)
+    logp_fn = pytensor.function([loc, y_vv], logp)
 
     loc_test_val = np.full(rv_size, 4.0)
     y_test_val = np.full(rv_size, 1.0)
@@ -665,7 +667,7 @@ def test_scale_transform_rv(rv_size, scale_type):
 
     logp = joint_logprob({y_rv: y_vv}, sum=False)
     assert_no_rvs(logp)
-    logp_fn = aesara.function([scale, y_vv], logp)
+    logp_fn = pytensor.function([scale, y_vv], logp)
 
     scale_test_val = np.full(rv_size, 4.0)
     y_test_val = np.full(rv_size, 1.0)
@@ -685,7 +687,7 @@ def test_transformed_rv_and_value():
 
     logp = joint_logprob({y_rv: y_vv}, extra_rewrites=transform_rewrite)
     assert_no_rvs(logp)
-    logp_fn = aesara.function([y_vv], logp)
+    logp_fn = pytensor.function([y_vv], logp)
 
     y_test_val = -5
 
diff --git a/pymc/tests/logprob/test_utils.py b/pymc/tests/logprob/test_utils.py
index afed18a4a..5862cae32 100644
--- a/pymc/tests/logprob/test_utils.py
+++ b/pymc/tests/logprob/test_utils.py
@@ -34,14 +34,14 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from aesara import function
-from aesara.compile import get_default_mode
-from aesara.tensor.random.basic import normal, uniform
+from pytensor import function
+from pytensor.compile import get_default_mode
+from pytensor.tensor.random.basic import normal, uniform
 
 from pymc.logprob.abstract import MeasurableVariable, logprob
 from pymc.logprob.utils import (
@@ -51,7 +51,7 @@
     walk_model,
 )
 from pymc.tests.helpers import assert_no_rvs
-from pymc.tests.logprob.utils import create_aesara_params, scipy_logprob_tester
+from pymc.tests.logprob.utils import create_pytensor_params, scipy_logprob_tester
 
 
 def test_walk_model():
@@ -165,7 +165,7 @@ def test_CheckParameter():
 
 
 def test_dirac_delta():
-    fn = aesara.function(
+    fn = pytensor.function(
         [], dirac_delta(at.as_tensor(1)), mode=get_default_mode().excluding("useless")
     )
     with pytest.warns(UserWarning, match=".*DiracDelta.*"):
@@ -181,7 +181,7 @@ def test_dirac_delta():
 )
 def test_dirac_delta_logprob(dist_params, obs):
 
-    dist_params_at, obs_at, _ = create_aesara_params(dist_params, obs, ())
+    dist_params_at, obs_at, _ = create_pytensor_params(dist_params, obs, ())
     dist_params = dict(zip(dist_params_at, dist_params))
 
     x = dirac_delta(*dist_params_at)
diff --git a/pymc/tests/logprob/utils.py b/pymc/tests/logprob/utils.py
index 6ed3348cb..b438f5c45 100644
--- a/pymc/tests/logprob/utils.py
+++ b/pymc/tests/logprob/utils.py
@@ -36,9 +36,9 @@
 
 import numpy as np
 
-from aesara import tensor as at
-from aesara.graph.basic import walk
-from aesara.graph.op import HasInnerGraph
+from pytensor import tensor as at
+from pytensor.graph.basic import walk
+from pytensor.graph.op import HasInnerGraph
 from scipy import stats as stats
 
 from pymc.logprob.abstract import MeasurableVariable, icdf, logcdf, logprob
@@ -116,7 +116,7 @@ def scipy_logprob(obs, p):
         return np.log(p[obs])
 
 
-def create_aesara_params(dist_params, obs, size):
+def create_pytensor_params(dist_params, obs, size):
     dist_params_at = []
     for p in dist_params:
         p_aet = at.as_tensor(p).type()
@@ -150,25 +150,25 @@ def scipy_logprob_tester(
         test_fn = getattr(stats, name)
 
     if test == "logprob":
-        aesara_res = logprob(rv_var, at.as_tensor(obs))
+        pytensor_res = logprob(rv_var, at.as_tensor(obs))
     elif test == "logcdf":
-        aesara_res = logcdf(rv_var, at.as_tensor(obs))
+        pytensor_res = logcdf(rv_var, at.as_tensor(obs))
     elif test == "icdf":
-        aesara_res = icdf(rv_var, at.as_tensor(obs))
+        pytensor_res = icdf(rv_var, at.as_tensor(obs))
     else:
         raise ValueError(f"test must be one of (logprob, logcdf, icdf), got {test}")
 
-    aesara_res_val = aesara_res.eval(dist_params)
+    pytensor_res_val = pytensor_res.eval(dist_params)
 
     numpy_res = np.asarray(test_fn(obs, *dist_params.values()))
 
-    assert aesara_res.type.numpy_dtype.kind == numpy_res.dtype.kind
+    assert pytensor_res.type.numpy_dtype.kind == numpy_res.dtype.kind
 
     if check_broadcastable:
         numpy_shape = np.shape(numpy_res)
         numpy_bcast = [s == 1 for s in numpy_shape]
-        np.testing.assert_array_equal(aesara_res.type.broadcastable, numpy_bcast)
+        np.testing.assert_array_equal(pytensor_res.type.broadcastable, numpy_bcast)
 
-    np.testing.assert_array_equal(aesara_res_val.shape, numpy_res.shape)
+    np.testing.assert_array_equal(pytensor_res_val.shape, numpy_res.shape)
 
-    np.testing.assert_array_almost_equal(aesara_res_val, numpy_res, 4)
+    np.testing.assert_array_almost_equal(pytensor_res_val, numpy_res, 4)
diff --git a/pymc/tests/models.py b/pymc/tests/models.py
index d41287390..9b3c61f6f 100644
--- a/pymc/tests/models.py
+++ b/pymc/tests/models.py
@@ -14,16 +14,16 @@
 
 from itertools import product
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.compile.ops import as_op
+from pytensor.compile.ops import as_op
 
 import pymc as pm
 
 from pymc import Categorical, Metropolis, Model, Normal
-from pymc.aesaraf import floatX_array
+from pymc.pytensorf import floatX_array
 
 
 def simple_model():
@@ -63,7 +63,7 @@ def multidimensional_model():
 
 
 def simple_arbitrary_det():
-    scalar_type = at.dscalar if aesara.config.floatX == "float64" else at.fscalar
+    scalar_type = at.dscalar if pytensor.config.floatX == "float64" else at.fscalar
 
     @as_op(itypes=[scalar_type], otypes=[scalar_type])
     def arbitrary_det(value):
diff --git a/pymc/tests/ode/test_ode.py b/pymc/tests/ode/test_ode.py
index 8f90c677e..e4d6024db 100644
--- a/pymc/tests/ode/test_ode.py
+++ b/pymc/tests/ode/test_ode.py
@@ -13,9 +13,9 @@
 #   limitations under the License.
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
 from scipy.stats import norm
@@ -323,7 +323,7 @@ def system(y, t, p):
             forward = ode_model(theta=[alpha], y0=[y0])
             y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 with warnings.catch_warnings():
                     warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning)
                     warnings.filterwarnings(
@@ -359,7 +359,7 @@ def system(y, t, p):
             forward = ode_model(theta=[alpha, beta], y0=[y0])
             y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 with warnings.catch_warnings():
                     warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning)
                     warnings.filterwarnings(
@@ -406,7 +406,7 @@ def system(y, t, p):
             forward = ode_model(theta=[R], y0=[0.99, 0.01])
             y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 with warnings.catch_warnings():
                     warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning)
                     warnings.filterwarnings(
@@ -452,7 +452,7 @@ def system(y, t, p):
             forward = ode_model(theta=[beta, gamma], y0=[0.99, 0.01])
             y = pm.LogNormal("y", mu=pm.math.log(forward), sigma=sigma, observed=yobs)
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 with warnings.catch_warnings():
                     warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning)
                     warnings.filterwarnings(
diff --git a/pymc/tests/ode/test_utils.py b/pymc/tests/ode/test_utils.py
index 9faf60e3b..f6388e44d 100644
--- a/pymc/tests/ode/test_utils.py
+++ b/pymc/tests/ode/test_utils.py
@@ -19,14 +19,14 @@
 
 
 def test_gradients():
-    """Tests the computation of the sensitivities from the Aesara computation graph"""
+    """Tests the computation of the sensitivities from the PyTensor computation graph"""
 
     # ODE system for which to compute gradients
     def ode_func(y, t, p):
         return np.exp(-t) - p[0] * y[0]
 
-    # Computation of graidients with Aesara
-    augmented_ode_func = augment_system(ode_func, 1, 1 + 1)
+    # Computation of graidients with PyTensor
+    augmented_ode_func = augment_system(ode_func, n_states=1, n_theta=1)
 
     # This is the new system, ODE + Sensitivities, which will be integrated
     def augmented_system(Y, t, p):
diff --git a/pymc/tests/sampler_fixtures.py b/pymc/tests/sampler_fixtures.py
index db66784ad..c89c37c30 100644
--- a/pymc/tests/sampler_fixtures.py
+++ b/pymc/tests/sampler_fixtures.py
@@ -11,10 +11,10 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
-import aesara.tensor as at
 import arviz as az
 import numpy as np
 import numpy.testing as npt
+import pytensor.tensor as at
 
 from scipy import stats
 
diff --git a/pymc/tests/sampling/test_forward.py b/pymc/tests/sampling/test_forward.py
index 1cfbd6905..4eaa79504 100644
--- a/pymc/tests/sampling/test_forward.py
+++ b/pymc/tests/sampling/test_forward.py
@@ -16,25 +16,25 @@
 
 from typing import Tuple
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.random as npr
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 import xarray as xr
 
-from aesara import Mode, shared
-from aesara.compile import SharedVariable
 from arviz import InferenceData
 from arviz import from_dict as az_from_dict
 from arviz.tests.helpers import check_multiple_attrs
+from pytensor import Mode, shared
+from pytensor.compile import SharedVariable
 from scipy import stats
 
 import pymc as pm
 
-from pymc.aesaraf import compile_pymc
 from pymc.backends.base import MultiTrace
+from pymc.pytensorf import compile_pymc
 from pymc.sampling.forward import (
     compile_forward_sampling_function,
     get_vars_in_point_list,
@@ -98,8 +98,8 @@ def test_draw_different_samples(self):
         x_draws_2 = pm.draw(x, 100)
         assert not np.all(np.isclose(x_draws_1, x_draws_2))
 
-    def test_draw_aesara_function_kwargs(self):
-        sharedvar = aesara.shared(0)
+    def test_draw_pytensor_function_kwargs(self):
+        sharedvar = pytensor.shared(0)
         x = pm.DiracDelta.dist(0.0)
         y = x + sharedvar
         draws = pm.draw(
@@ -116,7 +116,7 @@ class TestCompileForwardSampler:
     def get_function_roots(function):
         return [
             var
-            for var in aesara.graph.basic.graph_inputs(function.maker.fgraph.outputs)
+            for var in pytensor.graph.basic.graph_inputs(function.maker.fgraph.outputs)
             if var.name
         ]
 
@@ -579,7 +579,7 @@ def test_model_not_drawable_prior(self):
         with model:
             mu = pm.HalfFlat("sigma")
             pm.Poisson("foo", mu=mu, observed=data)
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 with warnings.catch_warnings():
                     warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning)
                     idata = pm.sample(tune=10, draws=40, chains=1)
@@ -596,8 +596,8 @@ def test_model_shared_variable(self):
 
         x = rng.randn(100)
         y = x > 0
-        x_shared = aesara.shared(x)
-        y_shared = aesara.shared(y)
+        x_shared = pytensor.shared(x)
+        y_shared = pytensor.shared(y)
         samples = 100
         with pm.Model() as model:
             coeff = pm.Normal("x", mu=0, sigma=1)
@@ -627,8 +627,8 @@ def test_model_shared_variable(self):
     def test_deterministic_of_observed(self):
         rng = np.random.RandomState(8442)
 
-        meas_in_1 = pm.aesaraf.floatX(2 + 4 * rng.randn(10))
-        meas_in_2 = pm.aesaraf.floatX(5 + 4 * rng.randn(10))
+        meas_in_1 = pm.pytensorf.floatX(2 + 4 * rng.randn(10))
+        meas_in_2 = pm.pytensorf.floatX(5 + 4 * rng.randn(10))
         nchains = 2
         with pm.Model() as model:
             mu_in_1 = pm.Normal("mu_in_1", 0, 2)
@@ -641,7 +641,7 @@ def test_deterministic_of_observed(self):
             out_diff = in_1 + in_2
             pm.Deterministic("out", out_diff)
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 trace = pm.sample(
                     tune=100,
                     draws=100,
@@ -652,7 +652,7 @@ def test_deterministic_of_observed(self):
                     random_seed=rng,
                 )
 
-            rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-4
+            rtol = 1e-5 if pytensor.config.floatX == "float64" else 1e-4
 
             ppc = pm.sample_posterior_predictive(
                 return_inferencedata=False,
@@ -667,8 +667,8 @@ def test_deterministic_of_observed(self):
     def test_deterministic_of_observed_modified_interface(self):
         rng = np.random.RandomState(4982)
 
-        meas_in_1 = pm.aesaraf.floatX(2 + 4 * rng.randn(100))
-        meas_in_2 = pm.aesaraf.floatX(5 + 4 * rng.randn(100))
+        meas_in_1 = pm.pytensorf.floatX(2 + 4 * rng.randn(100))
+        meas_in_2 = pm.pytensorf.floatX(5 + 4 * rng.randn(100))
         with pm.Model() as model:
             mu_in_1 = pm.Normal("mu_in_1", 0, 1, initval=0)
             sigma_in_1 = pm.HalfNormal("sd_in_1", 1, initval=1)
@@ -680,7 +680,7 @@ def test_deterministic_of_observed_modified_interface(self):
             out_diff = in_1 + in_2
             pm.Deterministic("out", out_diff)
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 trace = pm.sample(
                     tune=100,
                     draws=100,
@@ -700,7 +700,7 @@ def test_deterministic_of_observed_modified_interface(self):
                 var_names=[x.name for x in (model.deterministics + model.basic_RVs)],
             )
 
-            rtol = 1e-5 if aesara.config.floatX == "float64" else 1e-3
+            rtol = 1e-5 if pytensor.config.floatX == "float64" else 1e-3
             npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
 
     def test_variable_type(self):
@@ -708,7 +708,7 @@ def test_variable_type(self):
             mu = pm.HalfNormal("mu", 1)
             a = pm.Normal("a", mu=mu, sigma=2, observed=np.array([1, 2]))
             b = pm.Poisson("b", mu, observed=np.array([1, 2]))
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 trace = pm.sample(
                     tune=10, draws=10, compute_convergence_checks=False, return_inferencedata=False
                 )
@@ -1060,7 +1060,7 @@ def test_multivariate2(self):
         with pm.Model() as dm_model:
             probs = pm.Dirichlet("probs", a=np.ones(6))
             obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data)
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 burned_trace = pm.sample(
                     tune=10,
                     draws=20,
@@ -1211,8 +1211,8 @@ def test_issue_4490(self):
         assert prior1.prior["c"] == prior2.prior["c"]
         assert prior1.prior["d"] == prior2.prior["d"]
 
-    def test_aesara_function_kwargs(self):
-        sharedvar = aesara.shared(0)
+    def test_pytensor_function_kwargs(self):
+        sharedvar = pytensor.shared(0)
         with pm.Model() as m:
             x = pm.DiracDelta("x", 0)
             y = pm.Deterministic("y", x + sharedvar)
@@ -1258,8 +1258,8 @@ def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture):
             idat = pm.to_inference_data(trace)
             pp = pm.sample_posterior_predictive(idat.posterior, var_names=["d"])
 
-    def test_aesara_function_kwargs(self):
-        sharedvar = aesara.shared(0)
+    def test_pytensor_function_kwargs(self):
+        sharedvar = pytensor.shared(0)
         with pm.Model() as m:
             x = pm.DiracDelta("x", 0.0)
             y = pm.Deterministic("y", x + sharedvar)
diff --git a/pymc/tests/sampling/test_jax.py b/pymc/tests/sampling/test_jax.py
index 2f2cb553b..34d7a4035 100644
--- a/pymc/tests/sampling/test_jax.py
+++ b/pymc/tests/sampling/test_jax.py
@@ -3,16 +3,16 @@
 from typing import Any, Callable, Dict, Optional
 from unittest import mock
 
-import aesara
-import aesara.tensor as at
 import arviz as az
 import jax
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from aesara.compile import SharedVariable
-from aesara.graph import graph_inputs
 from numpyro.infer import MCMC
+from pytensor.compile import SharedVariable
+from pytensor.graph import graph_inputs
 
 import pymc as pm
 
@@ -56,11 +56,11 @@ def test_old_import_route():
     ],
 )
 def test_transform_samples(sampler, postprocessing_backend, chains):
-    aesara.config.on_opt_error = "raise"
+    pytensor.config.on_opt_error = "raise"
     np.random.seed(13244)
 
     obs = np.random.normal(10, 2, size=100)
-    obs_at = aesara.shared(obs, borrow=True, name="obs")
+    obs_at = pytensor.shared(obs, borrow=True, name="obs")
     with pm.Model() as model:
         a = pm.Uniform("a", -20, 20)
         sigma = pm.HalfNormal("sigma", shape=(2,))
@@ -103,11 +103,11 @@ def test_transform_samples(sampler, postprocessing_backend, chains):
 )
 @pytest.mark.skipif(len(jax.devices()) < 2, reason="not enough devices")
 def test_deterministic_samples(sampler):
-    aesara.config.on_opt_error = "raise"
+    pytensor.config.on_opt_error = "raise"
     np.random.seed(13244)
 
     obs = np.random.normal(10, 2, size=100)
-    obs_at = aesara.shared(obs, borrow=True, name="obs")
+    obs_at = pytensor.shared(obs, borrow=True, name="obs")
     with pm.Model() as model:
         a = pm.Uniform("a", -20, 20)
         b = pm.Deterministic("b", a / 2.0)
@@ -121,7 +121,7 @@ def test_deterministic_samples(sampler):
 
 def test_get_jaxified_graph():
     # Check that jaxifying a graph does not emmit the Supervisor Warning. This test can
-    # be removed once https://github.com/aesara-devs/aesara/issues/637 is sorted.
+    # be removed once https://github.com/pytensor-devs/pytensor/issues/637 is sorted.
     x = at.scalar("x")
     y = at.exp(x)
     with warnings.catch_warnings():
@@ -132,7 +132,7 @@ def test_get_jaxified_graph():
 
 def test_get_log_likelihood():
     obs = np.random.normal(10, 2, size=100)
-    obs_at = aesara.shared(obs, borrow=True, name="obs")
+    obs_at = pytensor.shared(obs, borrow=True, name="obs")
     with pm.Model() as model:
         a = pm.Normal("a", 0, 2)
         sigma = pm.HalfNormal("sigma")
@@ -151,7 +151,7 @@ def test_get_log_likelihood():
 
 
 def test_replace_shared_variables():
-    x = aesara.shared(5, name="shared_x")
+    x = pytensor.shared(5, name="shared_x")
 
     new_x = _replace_shared_variables([x])
     shared_variables = [var for var in graph_inputs(new_x) if isinstance(var, SharedVariable)]
diff --git a/pymc/tests/sampling/test_mcmc.py b/pymc/tests/sampling/test_mcmc.py
index 625170f05..e472a947d 100644
--- a/pymc/tests/sampling/test_mcmc.py
+++ b/pymc/tests/sampling/test_mcmc.py
@@ -18,16 +18,16 @@
 from contextlib import ExitStack as does_not_raise
 from copy import copy
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.special
 
-from aesara import shared
-from aesara.compile.ops import as_op
 from arviz import InferenceData
+from pytensor import shared
+from pytensor.compile.ops import as_op
 
 import pymc as pm
 
@@ -491,7 +491,7 @@ def test_partial_trace_unsupported():
             pm.sample(trace=[a])
 
 
-@pytest.mark.xfail(condition=(aesara.config.floatX == "float32"), reason="Fails on float32")
+@pytest.mark.xfail(condition=(pytensor.config.floatX == "float32"), reason="Fails on float32")
 class TestNamedSampling(SeededTest):
     def test_shared_named(self):
         G_var = shared(value=np.atleast_2d(1.0), shape=(1, None), name="G")
@@ -755,7 +755,7 @@ def test_bernoulli(self):
         """Test bernoulli distribution is assigned binary gibbs metropolis method"""
         with pm.Model() as model:
             pm.Bernoulli("x", 0.5)
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert isinstance(steps, BinaryGibbsMetropolis)
 
@@ -763,7 +763,7 @@ def test_normal(self):
         """Test normal distribution is assigned NUTS method"""
         with pm.Model() as model:
             pm.Normal("x", 0, 1)
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert isinstance(steps, NUTS)
 
@@ -771,12 +771,12 @@ def test_categorical(self):
         """Test categorical distribution is assigned categorical gibbs metropolis method"""
         with pm.Model() as model:
             pm.Categorical("x", np.array([0.25, 0.75]))
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert isinstance(steps, BinaryGibbsMetropolis)
         with pm.Model() as model:
             pm.Categorical("y", np.array([0.25, 0.70, 0.05]))
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert isinstance(steps, CategoricalGibbsMetropolis)
 
@@ -784,7 +784,7 @@ def test_binomial(self):
         """Test binomial distribution is assigned metropolis method."""
         with pm.Model() as model:
             pm.Binomial("x", 10, 0.5)
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert isinstance(steps, Metropolis)
 
@@ -793,8 +793,8 @@ def test_normal_nograd_op(self):
         with pm.Model() as model:
             x = pm.Normal("x", 0, 1)
 
-            # a custom Aesara Op that does not have a grad:
-            is_64 = aesara.config.floatX == "float64"
+            # a custom PyTensor Op that does not have a grad:
+            is_64 = pytensor.config.floatX == "float64"
             itypes = [at.dscalar] if is_64 else [at.fscalar]
             otypes = [at.dscalar] if is_64 else [at.fscalar]
 
@@ -803,9 +803,9 @@ def kill_grad(x):
                 return x
 
             data = np.random.normal(size=(100,))
-            pm.Normal("y", mu=kill_grad(x), sigma=1, observed=data.astype(aesara.config.floatX))
+            pm.Normal("y", mu=kill_grad(x), sigma=1, observed=data.astype(pytensor.config.floatX))
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert isinstance(steps, Slice)
 
@@ -818,7 +818,7 @@ def test_modify_step_methods(self):
 
         with pm.Model() as model:
             pm.Normal("x", 0, 1)
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert not isinstance(steps, NUTS)
 
@@ -827,7 +827,7 @@ def test_modify_step_methods(self):
 
         with pm.Model() as model:
             pm.Normal("x", 0, 1)
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 steps = assign_step_methods(model, [])
         assert isinstance(steps, NUTS)
 
@@ -836,14 +836,14 @@ class TestType:
     samplers = (Metropolis, Slice, HamiltonianMC, NUTS)
 
     def setup_method(self):
-        # save Aesara config object
-        self.aesara_config = copy(aesara.config)
+        # save PyTensor config object
+        self.pytensor_config = copy(pytensor.config)
 
     def teardown_method(self):
-        # restore aesara config
-        aesara.config = self.aesara_config
+        # restore pytensor config
+        pytensor.config = self.pytensor_config
 
-    @aesara.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
+    @pytensor.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
     def test_float64(self):
         with pm.Model() as model:
             x = pm.Normal("x", initval=np.array(1.0, dtype="float64"))
@@ -858,7 +858,7 @@ def test_float64(self):
                     warnings.filterwarnings("ignore", ".*number of samples.*", UserWarning)
                     pm.sample(draws=10, tune=10, chains=1, step=sampler())
 
-    @aesara.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
+    @pytensor.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
     def test_float32(self):
         with pm.Model() as model:
             x = pm.Normal("x", initval=np.array(1.0, dtype="float32"))
@@ -881,7 +881,7 @@ def test_sample(self):
 
         x_pred = np.linspace(-3, 3, 200)
 
-        x_shared = aesara.shared(x)
+        x_shared = pytensor.shared(x)
 
         with pm.Model() as model:
             b = pm.Normal("b", 0.0, 10.0)
diff --git a/pymc/tests/sampling/test_parallel.py b/pymc/tests/sampling/test_parallel.py
index 77ba9f48b..76823f8ed 100644
--- a/pymc/tests/sampling/test_parallel.py
+++ b/pymc/tests/sampling/test_parallel.py
@@ -17,19 +17,19 @@
 import sys
 import warnings
 
-import aesara
-import aesara.tensor as at
 import cloudpickle
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from aesara.compile.ops import as_op
-from aesara.tensor.type import TensorType
+from pytensor.compile.ops import as_op
+from pytensor.tensor.type import TensorType
 
 import pymc as pm
 import pymc.sampling.parallel as ps
 
-from pymc.aesaraf import floatX
+from pymc.pytensorf import floatX
 
 
 def test_context():
@@ -69,7 +69,7 @@ def test_bad_unpickle():
         assert "could not be unpickled" in str(exc_info.getrepr(style="short"))
 
 
-at_vector = TensorType(aesara.config.floatX, [False])
+at_vector = TensorType(pytensor.config.floatX, [False])
 
 
 @as_op([at_vector, at.iscalar], [at_vector])
diff --git a/pymc/tests/smc/test_smc.py b/pymc/tests/smc/test_smc.py
index a53a39a6a..4e9317071 100644
--- a/pymc/tests/smc/test_smc.py
+++ b/pymc/tests/smc/test_smc.py
@@ -13,8 +13,8 @@
 #   limitations under the License.
 import warnings
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 import pytest
 import scipy.stats as st
 
@@ -22,8 +22,8 @@
 
 import pymc as pm
 
-from pymc.aesaraf import floatX
 from pymc.backends.base import MultiTrace
+from pymc.pytensorf import floatX
 from pymc.smc.kernels import IMH, systematic_resampling
 from pymc.tests.helpers import SeededTest, assert_random_state_equal
 
diff --git a/pymc/tests/step_methods/hmc/test_hmc.py b/pymc/tests/step_methods/hmc/test_hmc.py
index 1cca395f5..7da9447ce 100644
--- a/pymc/tests/step_methods/hmc/test_hmc.py
+++ b/pymc/tests/step_methods/hmc/test_hmc.py
@@ -20,8 +20,8 @@
 
 import pymc as pm
 
-from pymc.aesaraf import floatX
 from pymc.blocking import DictToArrayBijection, RaveledVars
+from pymc.pytensorf import floatX
 from pymc.step_methods.hmc import HamiltonianMC
 from pymc.step_methods.hmc.base_hmc import BaseHMC
 from pymc.tests import models
diff --git a/pymc/tests/step_methods/hmc/test_nuts.py b/pymc/tests/step_methods/hmc/test_nuts.py
index 89dba215c..292ed77ca 100644
--- a/pymc/tests/step_methods/hmc/test_nuts.py
+++ b/pymc/tests/step_methods/hmc/test_nuts.py
@@ -16,14 +16,14 @@
 import sys
 import warnings
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 import pytest
 
 import pymc as pm
 
-from pymc.aesaraf import floatX
 from pymc.exceptions import SamplingError
+from pymc.pytensorf import floatX
 from pymc.step_methods.hmc import NUTS
 from pymc.tests import sampler_fixtures as sf
 from pymc.tests.helpers import RVsAssignmentStepsTester, StepMethodTester
diff --git a/pymc/tests/step_methods/hmc/test_quadpotential.py b/pymc/tests/step_methods/hmc/test_quadpotential.py
index c485edf66..c0cf464b7 100644
--- a/pymc/tests/step_methods/hmc/test_quadpotential.py
+++ b/pymc/tests/step_methods/hmc/test_quadpotential.py
@@ -20,7 +20,7 @@
 
 import pymc
 
-from pymc.aesaraf import floatX
+from pymc.pytensorf import floatX
 from pymc.step_methods.hmc import quadpotential
 
 
diff --git a/pymc/tests/step_methods/test_compound.py b/pymc/tests/step_methods/test_compound.py
index 072400547..954c3ca19 100644
--- a/pymc/tests/step_methods/test_compound.py
+++ b/pymc/tests/step_methods/test_compound.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
+import pytensor
 import pytest
 
 import pymc as pm
@@ -34,14 +34,14 @@ class TestCompoundStep:
 
     def test_non_blocked(self):
         """Test that samplers correctly create non-blocked compound steps."""
-        with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+        with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
             _, model = simple_2model_continuous()
             with model:
                 for sampler in self.samplers:
                     assert isinstance(sampler(blocked=False), CompoundStep)
 
     def test_blocked(self):
-        with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+        with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
             _, model = simple_2model_continuous()
             with model:
                 for sampler in self.samplers:
@@ -86,7 +86,7 @@ def test_compound_step(self):
             c1 = pm.HalfNormal("c1")
             c2 = pm.HalfNormal("c2")
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 step1 = NUTS([c1])
                 step2 = NUTS([c2])
                 step = CompoundStep([step1, step2])
diff --git a/pymc/tests/step_methods/test_metropolis.py b/pymc/tests/step_methods/test_metropolis.py
index d4ed99f62..c50a7c8e1 100644
--- a/pymc/tests/step_methods/test_metropolis.py
+++ b/pymc/tests/step_methods/test_metropolis.py
@@ -14,10 +14,10 @@
 
 import warnings
 
-import aesara
 import arviz as az
 import numpy as np
 import numpy.testing as npt
+import pytensor
 import pytest
 
 import pymc as pm
@@ -52,7 +52,7 @@ class TestMetropolisUniform(sf.MetropolisFixture, sf.UniformFixture):
 
 class TestMetropolis:
     def test_proposal_choice(self):
-        with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+        with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
             _, model, _ = mv_simple()
             with model:
                 initial_point = model.initial_point()
@@ -126,7 +126,7 @@ def test_elemwise_update(self, batched_dist):
     def test_multinomial_no_elemwise_update(self):
         with pm.Model() as m:
             batched_dist = pm.Multinomial("batched_dist", n=5, p=np.ones(4) / 4, shape=(10, 4))
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 step = pm.Metropolis([batched_dist])
                 assert not step.elemwise_update
 
@@ -343,7 +343,7 @@ def test_discrete_steps(self, step, step_kwargs):
             d1 = pm.Bernoulli("d1", p=0.5)
             d2 = pm.Bernoulli("d2", p=0.5)
 
-            with aesara.config.change_flags(mode=fast_unstable_sampling_mode):
+            with pytensor.config.change_flags(mode=fast_unstable_sampling_mode):
                 assert [m.rvs_to_values[d1]] == step([d1], **step_kwargs).vars
             assert {m.rvs_to_values[d1], m.rvs_to_values[d2]} == set(
                 step([d1, d2], **step_kwargs).vars
diff --git a/pymc/tests/test_data.py b/pymc/tests/test_data.py
index 9fa57e593..0182aa80a 100644
--- a/pymc/tests/test_data.py
+++ b/pymc/tests/test_data.py
@@ -15,19 +15,19 @@
 import io
 import itertools as it
 
-import aesara
-import aesara.tensor as at
 import cloudpickle
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.stats as st
 
-from aesara import shared
-from aesara.tensor.var import TensorVariable
+from pytensor import shared
+from pytensor.tensor.var import TensorVariable
 
 import pymc as pm
 
-from pymc.aesaraf import GeneratorOp, floatX
+from pymc.pytensorf import GeneratorOp, floatX
 from pymc.tests.helpers import SeededTest, select_by_precision
 
 
@@ -448,7 +448,7 @@ class _DataSampler:
     """
 
     def __init__(self, data, batchsize=50, random_seed=42, dtype="floatX"):
-        self.dtype = aesara.config.floatX if dtype == "floatX" else dtype
+        self.dtype = pytensor.config.floatX if dtype == "floatX" else dtype
         self.rng = np.random.RandomState(random_seed)
         self.data = data
         self.n = batchsize
@@ -490,7 +490,7 @@ def test_basic(self):
         generator = pm.GeneratorAdapter(integers())
         gop = GeneratorOp(generator)()
         assert gop.tag.test_value == np.float32(0)
-        f = aesara.function([], gop)
+        f = pytensor.function([], gop)
         assert f() == np.float32(0)
         assert f() == np.float32(1)
         for _ in range(2, 100):
@@ -502,7 +502,7 @@ def test_ndim(self):
             res = list(it.islice(integers_ndim(ndim), 0, 2))
             generator = pm.GeneratorAdapter(integers_ndim(ndim))
             gop = GeneratorOp(generator)()
-            f = aesara.function([], gop)
+            f = pytensor.function([], gop)
             assert ndim == res[0].ndim
             np.testing.assert_equal(f(), res[0])
             np.testing.assert_equal(f(), res[1])
@@ -510,9 +510,9 @@ def test_ndim(self):
     def test_cloning_available(self):
         gop = pm.generator(integers())
         res = gop**2
-        shared = aesara.shared(pm.floatX(10))
-        res1 = aesara.clone_replace(res, {gop: shared})
-        f = aesara.function([], res1)
+        shared = pytensor.shared(pm.floatX(10))
+        res1 = pytensor.clone_replace(res, {gop: shared})
+        f = pytensor.function([], res1)
         assert f() == np.float32(100)
 
     def test_default_value(self):
@@ -521,7 +521,7 @@ def gen():
                 yield pm.floatX(np.ones((10, 10)) * i)
 
         gop = pm.generator(gen(), np.ones((10, 10)) * 10)
-        f = aesara.function([], gop)
+        f = pytensor.function([], gop)
         np.testing.assert_equal(np.ones((10, 10)) * 0, f())
         np.testing.assert_equal(np.ones((10, 10)) * 1, f())
         np.testing.assert_equal(np.ones((10, 10)) * 10, f())
@@ -534,7 +534,7 @@ def gen():
                 yield pm.floatX(np.ones((10, 10)) * i)
 
         gop = pm.generator(gen())
-        f = aesara.function([], gop)
+        f = pytensor.function([], gop)
         np.testing.assert_equal(np.ones((10, 10)) * 0, f())
         np.testing.assert_equal(np.ones((10, 10)) * 1, f())
         with pytest.raises(StopIteration):
@@ -554,10 +554,10 @@ def test_gen_cloning_with_shape_change(self, datagen):
         gen = pm.generator(datagen)
         gen_r = pm.at_rng().normal(size=gen.shape).T
         X = gen.dot(gen_r)
-        res, _ = aesara.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
+        res, _ = pytensor.scan(lambda x: x.sum(), X, n_steps=X.shape[0])
         assert res.eval().shape == (50,)
-        shared = aesara.shared(datagen.data.astype(gen.dtype))
-        res2 = aesara.clone_replace(res, {gen: shared**2})
+        shared = pytensor.shared(datagen.data.astype(gen.dtype))
+        res2 = pytensor.clone_replace(res, {gen: shared**2})
         assert res2.eval().shape == (1000,)
 
 
@@ -583,11 +583,11 @@ class TestScaling:
     def test_density_scaling(self):
         with pm.Model() as model1:
             pm.Normal("n", observed=[[1]], total_size=1)
-            p1 = aesara.function([], model1.logp())
+            p1 = pytensor.function([], model1.logp())
 
         with pm.Model() as model2:
             pm.Normal("n", observed=[[1]], total_size=2)
-            p2 = aesara.function([], model2.logp())
+            p2 = pytensor.function([], model2.logp())
         assert p1() * 2 == p2()
 
     def test_density_scaling_with_generator(self):
@@ -602,12 +602,12 @@ def true_dens():
         # We have same size models
         with pm.Model() as model1:
             pm.Normal("n", observed=gen1(), total_size=100)
-            p1 = aesara.function([], model1.logp())
+            p1 = pytensor.function([], model1.logp())
 
         with pm.Model() as model2:
             gen_var = pm.generator(gen2())
             pm.Normal("n", observed=gen_var, total_size=100)
-            p2 = aesara.function([], model2.logp())
+            p2 = pytensor.function([], model2.logp())
 
         for i in range(10):
             _1, _2, _t = p1(), p2(), next(t)
@@ -624,7 +624,7 @@ def test_gradient_with_scaling(self):
             grad1 = model1.compile_fn(model1.dlogp(vars=m), point_fn=False)
         with pm.Model() as model2:
             m = pm.Normal("m")
-            shavar = aesara.shared(np.ones((1000, 100)))
+            shavar = pytensor.shared(np.ones((1000, 100)))
             pm.Normal("n", observed=shavar)
             grad2 = model2.compile_fn(model2.dlogp(vars=m), point_fn=False)
 
@@ -637,27 +637,27 @@ def test_gradient_with_scaling(self):
     def test_multidim_scaling(self):
         with pm.Model() as model0:
             pm.Normal("n", observed=[[1, 1], [1, 1]], total_size=[])
-            p0 = aesara.function([], model0.logp())
+            p0 = pytensor.function([], model0.logp())
 
         with pm.Model() as model1:
             pm.Normal("n", observed=[[1, 1], [1, 1]], total_size=[2, 2])
-            p1 = aesara.function([], model1.logp())
+            p1 = pytensor.function([], model1.logp())
 
         with pm.Model() as model2:
             pm.Normal("n", observed=[[1], [1]], total_size=[2, 2])
-            p2 = aesara.function([], model2.logp())
+            p2 = pytensor.function([], model2.logp())
 
         with pm.Model() as model3:
             pm.Normal("n", observed=[[1, 1]], total_size=[2, 2])
-            p3 = aesara.function([], model3.logp())
+            p3 = pytensor.function([], model3.logp())
 
         with pm.Model() as model4:
             pm.Normal("n", observed=[[1]], total_size=[2, 2])
-            p4 = aesara.function([], model4.logp())
+            p4 = pytensor.function([], model4.logp())
 
         with pm.Model() as model5:
             pm.Normal("n", observed=[[1]], total_size=[2, Ellipsis, 2])
-            p5 = aesara.function([], model5.logp())
+            p5 = pytensor.function([], model5.logp())
         _p0 = p0()
         assert (
             np.allclose(_p0, p1())
@@ -745,15 +745,15 @@ def test_special_batch_size(self, batch_size, expected):
     def test_cloning_available(self):
         gop = pm.Minibatch(np.arange(100), 1)
         res = gop**2
-        shared = aesara.shared(np.array([10]))
-        res1 = aesara.clone_replace(res, {gop: shared})
-        f = aesara.function([], res1)
+        shared = pytensor.shared(np.array([10]))
+        res1 = pytensor.clone_replace(res, {gop: shared})
+        f = pytensor.function([], res1)
         assert f() == np.array([100])
 
     def test_align(self):
         m = pm.Minibatch(np.arange(1000), 1, random_seed=1)
         n = pm.Minibatch(np.arange(1000), 1, random_seed=1)
-        f = aesara.function([], [m, n])
+        f = pytensor.function([], [m, n])
         n.eval()  # not aligned
         a, b = zip(*(f() for _ in range(1000)))
         assert a != b
diff --git a/pymc/tests/test_initial_point.py b/pymc/tests/test_initial_point.py
index 9c7a529ab..a9fdb5a62 100644
--- a/pymc/tests/test_initial_point.py
+++ b/pymc/tests/test_initial_point.py
@@ -11,13 +11,13 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
-import aesara
-import aesara.tensor as at
 import cloudpickle
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from aesara.tensor.random.op import RandomVariable
+from pytensor.tensor.random.op import RandomVariable
 
 import pymc as pm
 
@@ -118,7 +118,7 @@ def test_nested_initvals(self):
 
     def test_initval_resizing(self):
         with pm.Model() as pmodel:
-            data = aesara.shared(np.arange(4))
+            data = pytensor.shared(np.arange(4))
             rv = pm.Uniform("u", lower=data, upper=10, initval="prior")
 
             ip = pmodel.initial_point(random_seed=0)
@@ -167,7 +167,7 @@ def test_adds_jitter(self):
         assert b_transformed != 0
         assert -1 < b_transformed < 1
         # C is centered on 0 + untransformed initval of B
-        assert np.isclose(iv["C"], np.array(0 + b_untransformed, dtype=aesara.config.floatX))
+        assert np.isclose(iv["C"], np.array(0 + b_untransformed, dtype=pytensor.config.floatX))
         # Test jitter respects seeding.
         assert fn(0) == fn(0)
         assert fn(0) != fn(1)
diff --git a/pymc/tests/test_math.py b/pymc/tests/test_math.py
index 3b1603356..29480afd7 100644
--- a/pymc/tests/test_math.py
+++ b/pymc/tests/test_math.py
@@ -14,13 +14,12 @@
 
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.testing as npt
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from pymc.aesaraf import floatX
 from pymc.math import (
     LogDet,
     cartesian,
@@ -39,6 +38,7 @@
     probit,
     softmax,
 )
+from pymc.pytensorf import floatX
 from pymc.tests.helpers import SeededTest, verify_grad
 
 
@@ -211,10 +211,10 @@ def setup_method(self):
         self.op_class = LogDet
         self.op = logdet
 
-    @aesara.config.change_flags(compute_test_value="ignore")
+    @pytensor.config.change_flags(compute_test_value="ignore")
     def validate(self, input_mat):
-        x = aesara.tensor.matrix()
-        f = aesara.function([x], self.op(x))
+        x = pytensor.tensor.matrix()
+        f = pytensor.function([x], self.op(x))
         out = f(input_mat)
         svd_diag = np.linalg.svd(input_mat, compute_uv=False)
         numpy_out = np.sum(np.log(np.abs(svd_diag)))
@@ -226,21 +226,21 @@ def validate(self, input_mat):
         verify_grad(self.op, [input_mat])
 
     @pytest.mark.skipif(
-        aesara.config.device in ["cuda", "gpu"],
+        pytensor.config.device in ["cuda", "gpu"],
         reason="No logDet implementation on GPU.",
     )
     def test_basic(self):
         # Calls validate with different params
         test_case_1 = np.random.randn(3, 3) / np.sqrt(3)
         test_case_2 = np.random.randn(10, 10) / np.sqrt(10)
-        self.validate(test_case_1.astype(aesara.config.floatX))
-        self.validate(test_case_2.astype(aesara.config.floatX))
+        self.validate(test_case_1.astype(pytensor.config.floatX))
+        self.validate(test_case_2.astype(pytensor.config.floatX))
 
 
 def test_expand_packed_triangular():
     with pytest.raises(ValueError):
         x = at.matrix("x")
-        x.tag.test_value = np.array([[1.0]], dtype=aesara.config.floatX)
+        x.tag.test_value = np.array([[1.0]], dtype=pytensor.config.floatX)
         expand_packed_triangular(5, x)
     N = 5
     packed = at.vector("packed")
@@ -278,18 +278,18 @@ def test_invlogit_deprecation_warning():
 
 
 @pytest.mark.parametrize(
-    "aesara_function, pymc_wrapper",
+    "pytensor_function, pymc_wrapper",
     [
         (at.special.softmax, softmax),
         (at.special.log_softmax, log_softmax),
     ],
 )
-def test_softmax_logsoftmax_no_warnings(aesara_function, pymc_wrapper):
-    """Test that wrappers for aesara functions do not issue Warnings"""
+def test_softmax_logsoftmax_no_warnings(pytensor_function, pymc_wrapper):
+    """Test that wrappers for pytensor functions do not issue Warnings"""
 
     vector = at.vector("vector")
     with pytest.warns(Warning) as record:
-        aesara_function(vector)
+        pytensor_function(vector)
     assert {w.category for w in record.list} == {UserWarning, FutureWarning}
 
     with warnings.catch_warnings():
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index d437ab8d2..b614ca542 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -17,23 +17,23 @@
 import unittest
 import warnings
 
-import aesara
-import aesara.sparse as sparse
-import aesara.tensor as at
 import arviz as az
 import cloudpickle
 import numpy as np
 import numpy.ma as ma
 import numpy.testing as npt
+import pytensor
+import pytensor.sparse as sparse
+import pytensor.tensor as at
 import pytest
 import scipy.sparse as sps
 import scipy.stats as st
 
-from aesara.graph import graph_inputs
-from aesara.tensor import TensorVariable
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.sharedvar import ScalarSharedVariable
-from aesara.tensor.var import TensorConstant
+from pytensor.graph import graph_inputs
+from pytensor.tensor import TensorVariable
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.sharedvar import ScalarSharedVariable
+from pytensor.tensor.var import TensorConstant
 
 import pymc as pm
 
@@ -211,7 +211,7 @@ def test_observed_rv_fail(self):
 
     def test_observed_type(self):
         X_ = pm.floatX(np.random.randn(100, 5))
-        X = pm.floatX(aesara.shared(X_))
+        X = pm.floatX(pytensor.shared(X_))
         with pm.Model():
             x1 = pm.Normal("x1", observed=X_)
             x2 = pm.Normal("x2", observed=X)
@@ -364,10 +364,10 @@ def test_missing_data(self):
         # Assert that all the elements of res are equal
         assert res[1:] == res[:-1]
 
-    def test_aesara_switch_broadcast_edge_cases_1(self):
+    def test_pytensor_switch_broadcast_edge_cases_1(self):
         # Tests against two subtle issues related to a previous bug in Theano
         # where `tt.switch` would not always broadcast tensors with single
-        # values https://github.com/pymc-devs/aesara/issues/270
+        # values https://github.com/pymc-devs/pytensor/issues/270
 
         # Known issue 1: https://github.com/pymc-devs/pymc/issues/4389
         data = pm.floatX(np.zeros(10))
@@ -380,7 +380,7 @@ def test_aesara_switch_broadcast_edge_cases_1(self):
             np.log(0.5) * 10,
         )
 
-    def test_aesara_switch_broadcast_edge_cases_2(self):
+    def test_pytensor_switch_broadcast_edge_cases_2(self):
         # Known issue 2: https://github.com/pymc-devs/pymc/issues/4417
         # fmt: off
         data = np.array([
@@ -414,7 +414,7 @@ def test_tempered_logp_dlogp():
     with pm.Model() as model:
         pm.Normal("x")
         pm.Normal("y", observed=1)
-        pm.Potential("z", at.constant(-1.0, dtype=aesara.config.floatX))
+        pm.Potential("z", at.constant(-1.0, dtype=pytensor.config.floatX))
 
     func = model.logp_dlogp_function()
     func.set_extra_values({})
@@ -523,7 +523,7 @@ def test_make_obs_var():
     Check returned values for `data` given known inputs to `as_tensor()`.
 
     Note that ndarrays should return a TensorConstant and sparse inputs
-    should return a Sparse Aesara object.
+    should return a Sparse PyTensor object.
     """
     # Create the various inputs to the function
     input_name = "testing_inputs"
@@ -574,14 +574,14 @@ def test_initial_point():
         a = pm.Uniform("a")
         x = pm.Normal("x", a)
 
-    b_initval = np.array(0.3, dtype=aesara.config.floatX)
+    b_initval = np.array(0.3, dtype=pytensor.config.floatX)
 
     with pytest.warns(FutureWarning), model:
         b = pm.Uniform("b", testval=b_initval)
 
     b_initval_trans = model.rvs_to_transforms[b].forward(b_initval, *b.owner.inputs).eval()
 
-    y_initval = np.array(-2.4, dtype=aesara.config.floatX)
+    y_initval = np.array(-2.4, dtype=pytensor.config.floatX)
 
     with model:
         y = pm.Normal("y", initval=y_initval)
@@ -1058,11 +1058,11 @@ def test_compile_fn():
     np.testing.assert_allclose(result_compute, result_expect)
 
 
-def test_model_aesara_config():
-    assert aesara.config.mode != "JAX"
-    with pm.Model(aesara_config=dict(mode="JAX")) as model:
-        assert aesara.config.mode == "JAX"
-    assert aesara.config.mode != "JAX"
+def test_model_pytensor_config():
+    assert pytensor.config.mode != "JAX"
+    with pm.Model(pytensor_config=dict(mode="JAX")) as model:
+        assert pytensor.config.mode == "JAX"
+    assert pytensor.config.mode != "JAX"
 
 
 def test_model_parent_set_programmatically():
@@ -1454,7 +1454,7 @@ class TestShared(SeededTest):
     def test_deterministic(self):
         with pm.Model() as model:
             data_values = np.array([0.5, 0.4, 5, 2])
-            X = aesara.shared(np.asarray(data_values, dtype=aesara.config.floatX), borrow=True)
+            X = pytensor.shared(np.asarray(data_values, dtype=pytensor.config.floatX), borrow=True)
             pm.Normal("y", 0, 1, observed=X)
             assert np.all(
                 np.isclose(model.compile_logp(sum=False)({}), st.norm().logpdf(data_values))
diff --git a/pymc/tests/test_model_graph.py b/pymc/tests/test_model_graph.py
index 59d77db32..10ec4c87e 100644
--- a/pymc/tests/test_model_graph.py
+++ b/pymc/tests/test_model_graph.py
@@ -13,13 +13,13 @@
 #   limitations under the License.
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
-from aesara.compile.sharedvalue import SharedVariable
-from aesara.tensor.var import TensorConstant
+from pytensor.compile.sharedvalue import SharedVariable
+from pytensor.tensor.var import TensorConstant
 
 import pymc as pm
 
@@ -102,7 +102,7 @@ def radon_model():
         sigma_y = pm.Uniform("sigma_y", lower=0, upper=100)
 
         # Anonymous SharedVariables don't show up
-        floor_measure = aesara.shared(floor_measure)
+        floor_measure = pytensor.shared(floor_measure)
         floor_measure_offset = pm.MutableData("floor_measure_offset", 1)
         y_hat = a + b * floor_measure + floor_measure_offset
         log_radon = pm.MutableData("log_radon", np.random.normal(1, 1, size=n_homes))
diff --git a/pymc/tests/test_printing.py b/pymc/tests/test_printing.py
index 75a3b2676..c21483ac8 100644
--- a/pymc/tests/test_printing.py
+++ b/pymc/tests/test_printing.py
@@ -1,7 +1,6 @@
 import numpy as np
 
 from pymc import Bernoulli, Censored, HalfCauchy, Mixture, StudentT
-from pymc.aesaraf import floatX
 from pymc.distributions import (
     Dirichlet,
     DirichletMultinomial,
@@ -15,6 +14,7 @@
 )
 from pymc.math import dot
 from pymc.model import Deterministic, Model, Potential
+from pymc.pytensorf import floatX
 
 
 class BaseTestStrAndLatexRepr:
diff --git a/pymc/tests/test_aesaraf.py b/pymc/tests/test_pytensorf.py
similarity index 91%
rename from pymc/tests/test_aesaraf.py
rename to pymc/tests/test_pytensorf.py
index 6524df181..ce02dbae1 100644
--- a/pymc/tests/test_aesaraf.py
+++ b/pymc/tests/test_pytensorf.py
@@ -13,26 +13,31 @@
 #   limitations under the License.
 from unittest import mock
 
-import aesara
-import aesara.tensor as at
 import numpy as np
 import numpy.ma as ma
 import numpy.testing as npt
 import pandas as pd
+import pytensor
+import pytensor.tensor as at
 import pytest
 import scipy.sparse as sps
 
-from aesara.compile.builders import OpFromGraph
-from aesara.graph.basic import Variable, equal_computations
-from aesara.tensor.random.basic import normal, uniform
-from aesara.tensor.random.op import RandomVariable
-from aesara.tensor.random.var import RandomStateSharedVariable
-from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
-from aesara.tensor.var import TensorVariable
+from pytensor.compile.builders import OpFromGraph
+from pytensor.graph.basic import Variable, equal_computations
+from pytensor.tensor.random.basic import normal, uniform
+from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.var import RandomStateSharedVariable
+from pytensor.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
+from pytensor.tensor.var import TensorVariable
 
 import pymc as pm
 
-from pymc.aesaraf import (
+from pymc.distributions.dist_math import check_parameters
+from pymc.distributions.distribution import SymbolicRandomVariable
+from pymc.distributions.transforms import Interval
+from pymc.exceptions import NotConstantValueError
+from pymc.logprob.utils import ParameterValueError
+from pymc.pytensorf import (
     compile_pymc,
     constant_fold,
     convert_observed_data,
@@ -43,11 +48,6 @@
     rvs_to_value_vars,
     walk_model,
 )
-from pymc.distributions.dist_math import check_parameters
-from pymc.distributions.distribution import SymbolicRandomVariable
-from pymc.distributions.transforms import Interval
-from pymc.exceptions import NotConstantValueError
-from pymc.logprob.utils import ParameterValueError
 from pymc.tests.helpers import assert_no_rvs
 from pymc.vartypes import int_types
 
@@ -143,7 +143,7 @@ def test_extract_obs_data():
     data_at = at.as_tensor(data)
     mask = np.random.binomial(1, 0.5, size=(2, 3)).astype(bool)
 
-    for val_at in (data_at, aesara.shared(data)):
+    for val_at in (data_at, pytensor.shared(data)):
         res = extract_obs_data(val_at)
 
         assert isinstance(res, np.ndarray)
@@ -199,7 +199,7 @@ def test_convert_observed_data(input_dtype):
     dense_input = np.arange(9).reshape((3, 3)).astype(input_dtype)
 
     input_name = "input_variable"
-    aesara_graph_input = at.as_tensor(dense_input, name=input_name)
+    pytensor_graph_input = at.as_tensor(dense_input, name=input_name)
     pandas_input = pd.DataFrame(dense_input)
 
     # All the even numbers are replaced with NaN
@@ -239,22 +239,22 @@ def test_convert_observed_data(input_dtype):
         assert func_output.shape == input_value.shape
         npt.assert_allclose(func_output, masked_array_input)
 
-    # Check function behavior with Aesara graph variable
-    aesara_output = func(aesara_graph_input)
-    assert isinstance(aesara_output, Variable)
-    npt.assert_allclose(aesara_output.eval(), aesara_graph_input.eval())
-    intX = pm.aesaraf._conversion_map[aesara.config.floatX]
-    if dense_input.dtype == intX or dense_input.dtype == aesara.config.floatX:
-        assert aesara_output.owner is None  # func should not have added new nodes
-        assert aesara_output.name == input_name
+    # Check function behavior with PyTensor graph variable
+    pytensor_output = func(pytensor_graph_input)
+    assert isinstance(pytensor_output, Variable)
+    npt.assert_allclose(pytensor_output.eval(), pytensor_graph_input.eval())
+    intX = pm.pytensorf._conversion_map[pytensor.config.floatX]
+    if dense_input.dtype == intX or dense_input.dtype == pytensor.config.floatX:
+        assert pytensor_output.owner is None  # func should not have added new nodes
+        assert pytensor_output.name == input_name
     else:
-        assert aesara_output.owner is not None  # func should have casted
-        assert aesara_output.owner.inputs[0].name == input_name
+        assert pytensor_output.owner is not None  # func should have casted
+        assert pytensor_output.owner.inputs[0].name == input_name
 
     if "float" in input_dtype:
-        assert aesara_output.dtype == aesara.config.floatX
+        assert pytensor_output.dtype == pytensor.config.floatX
     else:
-        assert aesara_output.dtype == intX
+        assert pytensor_output.dtype == intX
 
     # Check function behavior with generator data
     generator_output = func(square_generator)
@@ -264,7 +264,7 @@ def test_convert_observed_data(input_dtype):
     # Make sure the returned object has .set_gen and .set_default methods
     assert hasattr(wrapped, "set_gen")
     assert hasattr(wrapped, "set_default")
-    # Make sure the returned object is an Aesara TensorVariable
+    # Make sure the returned object is an PyTensor TensorVariable
     assert isinstance(wrapped, TensorVariable)
 
 
@@ -318,7 +318,7 @@ def test_check_bounds_flag(self):
             pass
 
         with pytest.raises(ParameterValueError):
-            aesara.function([], bound)()
+            pytensor.function([], bound)()
 
         m.check_bounds = False
         with m:
@@ -329,7 +329,7 @@ def test_check_bounds_flag(self):
             assert np.all(compile_pymc([], bound)() == -np.inf)
 
     def test_compile_pymc_sets_rng_updates(self):
-        rng = aesara.shared(np.random.default_rng(0))
+        rng = pytensor.shared(np.random.default_rng(0))
         x = pm.Normal.dist(rng=rng)
         assert x.owner.inputs[0] is rng
         f = compile_pymc([], x)
@@ -337,24 +337,24 @@ def test_compile_pymc_sets_rng_updates(self):
 
         # Check that update was not done inplace
         assert not hasattr(rng, "default_update")
-        f = aesara.function([], x)
+        f = pytensor.function([], x)
         assert f() == f()
 
     def test_compile_pymc_with_updates(self):
-        x = aesara.shared(0)
+        x = pytensor.shared(0)
         f = compile_pymc([], x, updates={x: x + 1})
         assert f() == 0
         assert f() == 1
 
     def test_compile_pymc_missing_default_explicit_updates(self):
-        rng = aesara.shared(np.random.default_rng(0))
+        rng = pytensor.shared(np.random.default_rng(0))
         x = pm.Normal.dist(rng=rng)
 
         # By default, compile_pymc should update the rng of x
         f = compile_pymc([], x)
         assert f() != f()
 
-        # An explicit update should override the default_update, like aesara.function does
+        # An explicit update should override the default_update, like pytensor.function does
         # For testing purposes, we use an update that leaves the rng unchanged
         f = compile_pymc([], x, updates={rng: rng})
         assert f() == f()
@@ -394,7 +394,7 @@ def test_compile_pymc_updates_inputs(self):
             assert len(fn_fgraph.outputs) == 1 + rvs_in_graph
 
     # Disable `reseed_rngs` so that we can test with simpler update rule
-    @mock.patch("pymc.aesaraf.reseed_rngs")
+    @mock.patch("pymc.pytensorf.reseed_rngs")
     def test_compile_pymc_custom_update_op(self, _):
         """Test that custom MeasurableVariable Op updates are used by compile_pymc"""
 
@@ -404,7 +404,7 @@ def update(self, node):
 
         dummy_inputs = [at.scalar(), at.scalar()]
         dummy_outputs = [at.add(*dummy_inputs)]
-        dummy_x = NonSymbolicRV(dummy_inputs, dummy_outputs)(aesara.shared(1.0), 1.0)
+        dummy_x = NonSymbolicRV(dummy_inputs, dummy_outputs)(pytensor.shared(1.0), 1.0)
 
         # Check that there are no updates at first
         fn = compile_pymc(inputs=[], outputs=dummy_x)
@@ -417,13 +417,13 @@ def update(self, node):
         assert fn() == 3.0
 
     def test_random_seed(self):
-        seedx = aesara.shared(np.random.default_rng(1))
-        seedy = aesara.shared(np.random.default_rng(1))
+        seedx = pytensor.shared(np.random.default_rng(1))
+        seedy = pytensor.shared(np.random.default_rng(1))
         x = at.random.normal(rng=seedx)
         y = at.random.normal(rng=seedy)
 
         # Shared variables are the same, so outputs will be identical
-        f0 = aesara.function([], [x, y])
+        f0 = pytensor.function([], [x, y])
         x0_eval, y0_eval = f0()
         assert x0_eval == y0_eval
 
@@ -444,7 +444,7 @@ def test_random_seed(self):
         assert y3_eval == y2_eval
 
     def test_multiple_updates_same_variable(self):
-        rng = aesara.shared(np.random.default_rng(), name="rng")
+        rng = pytensor.shared(np.random.default_rng(), name="rng")
         x = at.random.normal(rng=rng)
         y = at.random.normal(rng=rng)
 
@@ -456,7 +456,7 @@ def test_multiple_updates_same_variable(self):
 
 
 def test_replace_rng_nodes():
-    rng = aesara.shared(np.random.default_rng())
+    rng = pytensor.shared(np.random.default_rng())
     x = at.random.normal(rng=rng)
     x_rng, *x_non_rng_inputs = x.owner.inputs
 
@@ -495,7 +495,7 @@ def test_reseed_rngs():
     bit_generators = [default_rng(sub_seed) for sub_seed in np.random.SeedSequence(seed).spawn(2)]
 
     rngs = [
-        aesara.shared(rng_type(default_rng()))
+        pytensor.shared(rng_type(default_rng()))
         for rng_type in (np.random.Generator, np.random.RandomState)
     ]
     for rng, bit_generator in zip(rngs, bit_generators):
@@ -522,7 +522,7 @@ def test_constant_fold():
 
 
 def test_constant_fold_raises():
-    size = aesara.shared(5)
+    size = pytensor.shared(5)
     x = at.random.normal(size=(size,))
     y = at.arange(x.size)
 
@@ -647,7 +647,7 @@ def test_no_change_inplace(self, test_deprecated_fn):
             pm.Potential("two_pot", two)
             pm.Potential("one_pot", one)
 
-        before = aesara.clone_replace(m.free_RVs)
+        before = pytensor.clone_replace(m.free_RVs)
 
         # This call would change the model free_RVs in place in #5172
         if test_deprecated_fn:
@@ -660,7 +660,7 @@ def test_no_change_inplace(self, test_deprecated_fn):
                 rvs_to_transforms=m.rvs_to_transforms,
             )
 
-        after = aesara.clone_replace(m.free_RVs)
+        after = pytensor.clone_replace(m.free_RVs)
         assert equal_computations(before, after)
 
     @pytest.mark.parametrize("test_deprecated_fn", (True, False))
diff --git a/pymc/tests/variational/test_approximations.py b/pymc/tests/variational/test_approximations.py
index b7eda128d..3048444ba 100644
--- a/pymc/tests/variational/test_approximations.py
+++ b/pymc/tests/variational/test_approximations.py
@@ -12,8 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 
 import pymc as pm
@@ -46,8 +46,8 @@ def test_elbo():
     sigma = 1.0
     y_obs = np.array([1.6, 1.4])
 
-    post_mu = np.array([1.88], dtype=aesara.config.floatX)
-    post_sigma = np.array([1], dtype=aesara.config.floatX)
+    post_mu = np.array([1.88], dtype=pytensor.config.floatX)
+    post_sigma = np.array([1], dtype=pytensor.config.floatX)
     # Create a model for test
     with pm.Model() as model:
         mu = pm.Normal("mu", mu=mu0, sigma=sigma)
@@ -55,13 +55,13 @@ def test_elbo():
 
     # Create variational gradient tensor
     mean_field = MeanField(model=model)
-    with aesara.config.change_flags(compute_test_value="off"):
+    with pytensor.config.change_flags(compute_test_value="off"):
         elbo = -pm.operators.KL(mean_field)()(10000)
 
     mean_field.shared_params["mu"].set_value(post_mu)
     mean_field.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
 
-    f = aesara.function([], elbo)
+    f = pytensor.function([], elbo)
     elbo_mc = f()
 
     # Exact value
@@ -84,17 +84,17 @@ def test_scale_cost_to_minibatch_works(aux_total_size):
     y_obs = np.array([1.6, 1.4])
     beta = len(y_obs) / float(aux_total_size)
 
-    # TODO: aesara_config
-    # with pm.Model(aesara_config=dict(floatX='float64')):
+    # TODO: pytensor_config
+    # with pm.Model(pytensor_config=dict(floatX='float64')):
     # did not not work as expected
     # there were some numeric problems, so float64 is forced
-    with aesara.config.change_flags(floatX="float64", warn_float64="ignore"):
+    with pytensor.config.change_flags(floatX="float64", warn_float64="ignore"):
 
-        assert aesara.config.floatX == "float64"
-        assert aesara.config.warn_float64 == "ignore"
+        assert pytensor.config.floatX == "float64"
+        assert pytensor.config.warn_float64 == "ignore"
 
-        post_mu = np.array([1.88], dtype=aesara.config.floatX)
-        post_sigma = np.array([1], dtype=aesara.config.floatX)
+        post_mu = np.array([1.88], dtype=pytensor.config.floatX)
+        post_sigma = np.array([1], dtype=pytensor.config.floatX)
 
         with pm.Model():
             mu = pm.Normal("mu", mu=mu0, sigma=sigma)
@@ -105,7 +105,7 @@ def test_scale_cost_to_minibatch_works(aux_total_size):
             mean_field_1.shared_params["mu"].set_value(post_mu)
             mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
 
-            with aesara.config.change_flags(compute_test_value="off"):
+            with pytensor.config.change_flags(compute_test_value="off"):
                 elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000)
 
         with pm.Model():
@@ -119,7 +119,7 @@ def test_scale_cost_to_minibatch_works(aux_total_size):
             mean_field_2.shared_params["mu"].set_value(post_mu)
             mean_field_2.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
 
-        with aesara.config.change_flags(compute_test_value="off"):
+        with pytensor.config.change_flags(compute_test_value="off"):
             elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000)
 
         np.testing.assert_allclose(
@@ -137,10 +137,10 @@ def test_elbo_beta_kl(aux_total_size):
     y_obs = np.array([1.6, 1.4])
     beta = len(y_obs) / float(aux_total_size)
 
-    with aesara.config.change_flags(floatX="float64", warn_float64="ignore"):
+    with pytensor.config.change_flags(floatX="float64", warn_float64="ignore"):
 
-        post_mu = np.array([1.88], dtype=aesara.config.floatX)
-        post_sigma = np.array([1], dtype=aesara.config.floatX)
+        post_mu = np.array([1.88], dtype=pytensor.config.floatX)
+        post_sigma = np.array([1], dtype=pytensor.config.floatX)
 
         with pm.Model():
             mu = pm.Normal("mu", mu=mu0, sigma=sigma)
@@ -151,7 +151,7 @@ def test_elbo_beta_kl(aux_total_size):
             mean_field_1.shared_params["mu"].set_value(post_mu)
             mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
 
-            with aesara.config.change_flags(compute_test_value="off"):
+            with pytensor.config.change_flags(compute_test_value="off"):
                 elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000)
 
         with pm.Model():
@@ -162,7 +162,7 @@ def test_elbo_beta_kl(aux_total_size):
             mean_field_3.shared_params["mu"].set_value(post_mu)
             mean_field_3.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1))
 
-            with aesara.config.change_flags(compute_test_value="off"):
+            with pytensor.config.change_flags(compute_test_value="off"):
                 elbo_via_beta_kl = -pm.operators.KL(mean_field_3, beta=beta)()(10000)
 
         np.testing.assert_allclose(
diff --git a/pymc/tests/variational/test_callbacks.py b/pymc/tests/variational/test_callbacks.py
index ee8e7f456..2dd58872c 100644
--- a/pymc/tests/variational/test_callbacks.py
+++ b/pymc/tests/variational/test_callbacks.py
@@ -12,8 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 
 import pymc as pm
@@ -25,7 +25,7 @@ def test_callbacks_convergence(diff, ord):
     cb = pm.variational.callbacks.CheckParametersConvergence(every=1, diff=diff, ord=ord)
 
     class _approx:
-        params = (aesara.shared(np.asarray([1, 2, 3])),)
+        params = (pytensor.shared(np.asarray([1, 2, 3])),)
 
     approx = _approx()
 
diff --git a/pymc/tests/variational/test_inference.py b/pymc/tests/variational/test_inference.py
index 58c0687a4..6e668c8a7 100644
--- a/pymc/tests/variational/test_inference.py
+++ b/pymc/tests/variational/test_inference.py
@@ -15,16 +15,16 @@
 import io
 import operator
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 import pytest
 
 import pymc as pm
 import pymc.tests.models as models
 import pymc.variational.opvi as opvi
 
-from pymc.aesaraf import intX
+from pymc.pytensorf import intX
 from pymc.variational.inference import ADVI, ASVGD, SVGD, FullRankADVI
 
 pytestmark = pytest.mark.usefixtures("strict_float32", "seeded_test")
@@ -237,7 +237,7 @@ def binomial_model_inference(binomial_model, inference_spec):
         return inference_spec()
 
 
-@pytest.mark.xfail("aesara.config.warn_float64 == 'raise'", reason="too strict float32")
+@pytest.mark.xfail("pytensor.config.warn_float64 == 'raise'", reason="too strict float32")
 def test_replacements(binomial_model_inference):
     d = at.bscalar()
     d.tag.test_value = 1
@@ -246,11 +246,11 @@ def test_replacements(binomial_model_inference):
     p_t = p**3
     p_s = approx.sample_node(p_t)
     assert not any(
-        isinstance(n.owner.op, aesara.tensor.random.basic.BetaRV)
-        for n in aesara.graph.ancestors([p_s])
+        isinstance(n.owner.op, pytensor.tensor.random.basic.BetaRV)
+        for n in pytensor.graph.ancestors([p_s])
         if n.owner
     ), "p should be replaced"
-    if aesara.config.compute_test_value != "off":
+    if pytensor.config.compute_test_value != "off":
         assert p_s.tag.test_value.shape == p_t.tag.test_value.shape
     sampled = [p_s.eval() for _ in range(100)]
     assert any(map(operator.ne, sampled[1:], sampled[:-1]))  # stochastic
@@ -283,7 +283,7 @@ def test_sample_replacements(binomial_model_inference):
     p = approx.model.p
     p_t = p**3
     p_s = approx.sample_node(p_t, size=100)
-    if aesara.config.compute_test_value != "off":
+    if pytensor.config.compute_test_value != "off":
         assert p_s.tag.test_value.shape == (100,) + p_t.tag.test_value.shape
     sampled = p_s.eval()
     assert any(map(operator.ne, sampled[1:], sampled[:-1]))  # stochastic
@@ -303,14 +303,14 @@ def test_remove_scan_op():
         inference = ADVI()
         buff = io.StringIO()
         inference.run_profiling(n=10).summary(buff)
-        assert "aesara.scan.op.Scan" not in buff.getvalue()
+        assert "pytensor.scan.op.Scan" not in buff.getvalue()
         buff.close()
 
 
 def test_var_replacement():
     X_mean = pm.floatX(np.linspace(0, 10, 10))
     y = pm.floatX(np.random.normal(X_mean * 4, 0.05))
-    inp_size = aesara.shared(np.array(10, dtype="int64"), name="inp_size")
+    inp_size = pytensor.shared(np.array(10, dtype="int64"), name="inp_size")
     with pm.Model():
         inp = pm.Normal("X", X_mean, size=(inp_size,))
         coef = pm.Normal("b", 4.0)
diff --git a/pymc/tests/variational/test_opvi.py b/pymc/tests/variational/test_opvi.py
index 598692528..eafe99655 100644
--- a/pymc/tests/variational/test_opvi.py
+++ b/pymc/tests/variational/test_opvi.py
@@ -14,8 +14,8 @@
 
 import functools as ft
 
-import aesara.tensor as at
 import numpy as np
+import pytensor.tensor as at
 import pytest
 
 import pymc as pm
diff --git a/pymc/tests/variational/test_updates.py b/pymc/tests/variational/test_updates.py
index d83b81289..07c72dd81 100644
--- a/pymc/tests/variational/test_updates.py
+++ b/pymc/tests/variational/test_updates.py
@@ -12,8 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
 import numpy as np
+import pytensor
 import pytest
 
 from pymc.variational.updates import (
@@ -28,12 +28,12 @@
     sgd,
 )
 
-_a = aesara.shared(1.0)
+_a = pytensor.shared(1.0)
 _b = _a * 2
 
-_m = aesara.shared(np.empty((10,), aesara.config.floatX))
+_m = pytensor.shared(np.empty((10,), pytensor.config.floatX))
 _n = _m.sum()
-_m2 = aesara.shared(np.empty((10, 10, 10), aesara.config.floatX))
+_m2 = pytensor.shared(np.empty((10, 10, 10), pytensor.config.floatX))
 _n2 = _b + _n + _m2.sum()
 
 
@@ -71,7 +71,7 @@
     ids=["scalar", "matrix", "mixed"],
 )
 def test_updates_fast(opt, loss_and_params, kwargs, getter):
-    with aesara.config.change_flags(compute_test_value="ignore"):
+    with pytensor.config.change_flags(compute_test_value="ignore"):
         loss, param = getter(loss_and_params)
         args = dict()
         args.update(**kwargs)
diff --git a/pymc/tuning/scaling.py b/pymc/tuning/scaling.py
index 4ab9dd7cd..f52c53292 100644
--- a/pymc/tuning/scaling.py
+++ b/pymc/tuning/scaling.py
@@ -16,9 +16,9 @@
 
 from numpy import exp, log, sqrt
 
-from pymc.aesaraf import hessian_diag
 from pymc.blocking import DictToArrayBijection
 from pymc.model import Point, modelcontext
+from pymc.pytensorf import hessian_diag
 from pymc.util import get_var_name
 
 __all__ = ["find_hessian", "trace_cov", "guess_scaling"]
diff --git a/pymc/tuning/starting.py b/pymc/tuning/starting.py
index 7bce1f5ec..24ec6f8c4 100644
--- a/pymc/tuning/starting.py
+++ b/pymc/tuning/starting.py
@@ -22,12 +22,12 @@
 
 from typing import Optional, Sequence
 
-import aesara.gradient as tg
 import numpy as np
+import pytensor.gradient as tg
 
-from aesara import Variable
 from fastprogress.fastprogress import ProgressBar, progress_bar
 from numpy import isfinite
+from pytensor import Variable
 from scipy.optimize import minimize
 
 import pymc as pm
diff --git a/pymc/util.py b/pymc/util.py
index cbeceb34c..fc24554b7 100644
--- a/pymc/util.py
+++ b/pymc/util.py
@@ -22,10 +22,10 @@
 import numpy as np
 import xarray
 
-from aesara import Variable
-from aesara.compile import SharedVariable
-from aesara.graph.utils import ValidatingScratchpad
 from cachetools import LRUCache, cachedmethod
+from pytensor import Variable
+from pytensor.compile import SharedVariable
+from pytensor.graph.utils import ValidatingScratchpad
 
 
 class _UnsetType:
@@ -380,7 +380,7 @@ def check_dist_not_registered(dist, model=None):
 
 
 def point_wrapper(core_function):
-    """Wrap an aesara compiled function to be able to ingest point dictionaries whilst
+    """Wrap an pytensor compiled function to be able to ingest point dictionaries whilst
     ignoring the keys that are not valid inputs to the core function.
     """
     ins = [i.name for i in core_function.maker.fgraph.inputs if not isinstance(i, SharedVariable)]
diff --git a/pymc/variational/approximations.py b/pymc/variational/approximations.py
index cace8f03b..b615e804e 100644
--- a/pymc/variational/approximations.py
+++ b/pymc/variational/approximations.py
@@ -12,13 +12,13 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
 import numpy as np
+import pytensor
 
-from aesara import tensor as at
-from aesara.graph.basic import Variable
-from aesara.tensor.var import TensorVariable
 from arviz import InferenceData
+from pytensor import tensor as at
+from pytensor.graph.basic import Variable
+from pytensor.tensor.var import TensorVariable
 
 import pymc as pm
 
@@ -63,7 +63,7 @@ def cov(self):
     def std(self):
         return rho2sigma(self.rho)
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super().__init_group__(group)
         if not self._check_user_params():
@@ -89,8 +89,8 @@ def create_shared_params(self, start=None, start_sigma=None):
         rho = rho1
 
         return {
-            "mu": aesara.shared(pm.floatX(start), "mu"),
-            "rho": aesara.shared(pm.floatX(rho), "rho"),
+            "mu": pytensor.shared(pm.floatX(start), "mu"),
+            "rho": pytensor.shared(pm.floatX(rho), "rho"),
         }
 
     @node_property
@@ -122,7 +122,7 @@ class FullRankGroup(Group):
     short_name = "full_rank"
     alias_names = frozenset(["fr"])
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super().__init_group__(group)
         if not self._check_user_params():
@@ -132,8 +132,8 @@ def __init_group__(self, group):
     def create_shared_params(self, start=None):
         start = self._prepare_start(start)
         n = self.ddim
-        L_tril = np.eye(n)[np.tril_indices(n)].astype(aesara.config.floatX)
-        return {"mu": aesara.shared(start, "mu"), "L_tril": aesara.shared(L_tril, "L_tril")}
+        L_tril = np.eye(n)[np.tril_indices(n)].astype(pytensor.config.floatX)
+        return {"mu": pytensor.shared(start, "mu"), "L_tril": pytensor.shared(L_tril, "L_tril")}
 
     @node_property
     def L(self):
@@ -192,7 +192,7 @@ class EmpiricalGroup(Group):
     __param_spec__ = dict(histogram=("s", "d"))
     short_name = "empirical"
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super().__init_group__(group)
         self._check_trace()
@@ -221,7 +221,7 @@ def create_shared_params(self, trace=None, size=None, jitter=1, start=None):
                 for j in range(len(trace)):
                     histogram[i] = DictToArrayBijection.map(trace.point(j, t)).data
                     i += 1
-        return dict(histogram=aesara.shared(pm.floatX(histogram), "histogram"))
+        return dict(histogram=pytensor.shared(pm.floatX(histogram), "histogram"))
 
     def _check_trace(self):
         trace = self._kwargs.get("trace", None)
@@ -253,11 +253,11 @@ def randidx(self, size=None):
         ).astype("int32")
 
     def _new_initial(self, size, deterministic, more_replacements=None):
-        aesara_condition_is_here = isinstance(deterministic, Variable)
+        pytensor_condition_is_here = isinstance(deterministic, Variable)
         if size is None:
             size = 1
         size = at.as_tensor(size)
-        if aesara_condition_is_here:
+        if pytensor_condition_is_here:
             return at.switch(
                 deterministic,
                 at.repeat(self.mean.reshape((1, -1)), size, -1),
@@ -294,7 +294,7 @@ def std(self):
         return at.sqrt(at.diag(self.cov))
 
     def __str__(self):
-        if isinstance(self.histogram, aesara.compile.SharedVariable):
+        if isinstance(self.histogram, pytensor.compile.SharedVariable):
             shp = ", ".join(map(str, self.histogram.shape.eval()))
         else:
             shp = "None, " + str(self.ddim)
@@ -375,7 +375,7 @@ def evaluate_over_trace(self, node):
 
         Parameters
         ----------
-        node: Aesara Variables (or Aesara expressions)
+        node: PyTensor Variables (or PyTensor expressions)
 
         Returns
         -------
@@ -384,7 +384,7 @@ def evaluate_over_trace(self, node):
         node = self.to_flat_input(node)
 
         def sample(post, node):
-            return aesara.clone_replace(node, {self.input: post})
+            return pytensor.clone_replace(node, {self.input: post})
 
-        nodes, _ = aesara.scan(sample, self.histogram, non_sequences=[node])
+        nodes, _ = pytensor.scan(sample, self.histogram, non_sequences=[node])
         return nodes
diff --git a/pymc/variational/inference.py b/pymc/variational/inference.py
index 94bc223b3..ea2868d10 100644
--- a/pymc/variational/inference.py
+++ b/pymc/variational/inference.py
@@ -124,7 +124,7 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs):
         total_grad_norm_constraint: `float`
             Bounds gradient norm, prevents exploding gradient problem
         fn_kwargs: `dict`
-            Add kwargs to aesara.function (e.g. `{'profile': True}`)
+            Add kwargs to pytensor.function (e.g. `{'profile': True}`)
         more_replacements: `dict`
             Apply custom replacements before calculating gradients
 
@@ -417,7 +417,7 @@ class ADVI(KLqp):
 
         The tensors to which mini-bathced samples are supplied are
         handled separately by using callbacks in :func:`Inference.fit` method
-        that change storage of shared Aesara variable or by :func:`pymc.generator`
+        that change storage of shared PyTensor variable or by :func:`pymc.generator`
         that automatically iterates over minibatches and defined beforehand.
 
     -   (optional) Parameters of deterministic mappings
@@ -717,7 +717,7 @@ def fit(
     total_grad_norm_constraint: `float`
         Bounds gradient norm, prevents exploding gradient problem
     fn_kwargs: `dict`
-        Add kwargs to aesara.function (e.g. `{'profile': True}`)
+        Add kwargs to pytensor.function (e.g. `{'profile': True}`)
     more_replacements: `dict`
         Apply custom replacements before calculating gradients
 
diff --git a/pymc/variational/operators.py b/pymc/variational/operators.py
index 9192ee4b2..7464cb31d 100644
--- a/pymc/variational/operators.py
+++ b/pymc/variational/operators.py
@@ -13,9 +13,9 @@
 #   limitations under the License.
 from __future__ import annotations
 
-import aesara
+import pytensor
 
-from aesara.graph.basic import Variable
+from pytensor.graph.basic import Variable
 
 import pymc as pm
 
@@ -79,7 +79,7 @@ def __init__(self, op: KSD, tf: opvi.TestFunction):
             raise opvi.ParametrizationError("Op should be KSD")
         super().__init__(op, tf)
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def __call__(self, nmc, **kwargs) -> list[Variable]:
         op: KSD = self.op
         grad = op.apply(self.tf)
@@ -92,7 +92,7 @@ def __call__(self, nmc, **kwargs) -> list[Variable]:
         else:
             params = self.test_params + kwargs["more_tf_params"]
             grad *= pm.floatX(-1)
-        grads = aesara.grad(None, params, known_grads={z: grad})
+        grads = pytensor.grad(None, params, known_grads={z: grad})
         return self.approx.set_size_and_deterministic(
             grads, nmc, 0, kwargs.get("more_replacements")
         )
diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py
index f0c854b66..a76e2b8b7 100644
--- a/pymc/variational/opvi.py
+++ b/pymc/variational/opvi.py
@@ -51,15 +51,21 @@
 import itertools
 import warnings
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
-from aesara.graph.basic import Variable
+from pytensor.graph.basic import Variable
 
 import pymc as pm
 
-from pymc.aesaraf import (
+from pymc.backends.base import MultiTrace
+from pymc.backends.ndarray import NDArray
+from pymc.blocking import DictToArrayBijection
+from pymc.distributions.logprob import _get_scaling
+from pymc.initial_point import make_initial_point_fn
+from pymc.model import modelcontext
+from pymc.pytensorf import (
     SeedSequenceSeed,
     at_rng,
     compile_pymc,
@@ -67,12 +73,6 @@
     identity,
     reseed_rngs,
 )
-from pymc.backends.base import MultiTrace
-from pymc.backends.ndarray import NDArray
-from pymc.blocking import DictToArrayBijection
-from pymc.distributions.logprob import _get_scaling
-from pymc.initial_point import make_initial_point_fn
-from pymc.model import modelcontext
 from pymc.util import (
     RandomState,
     WithMemoization,
@@ -131,21 +131,21 @@ def node_property(f):
 
         def wrapper(fn):
             ff = append_name(f)(fn)
-            f_ = aesara.config.change_flags(compute_test_value="off")(ff)
+            f_ = pytensor.config.change_flags(compute_test_value="off")(ff)
             return property(locally_cachedmethod(f_))
 
         return wrapper
     else:
-        f_ = aesara.config.change_flags(compute_test_value="off")(f)
+        f_ = pytensor.config.change_flags(compute_test_value="off")(f)
         return property(locally_cachedmethod(f_))
 
 
-@aesara.config.change_flags(compute_test_value="ignore")
+@pytensor.config.change_flags(compute_test_value="ignore")
 def try_to_set_test_value(node_in, node_out, s):
     _s = s
     if s is None:
         s = 1
-    s = aesara.compile.view_op(at.as_tensor(s))
+    s = pytensor.compile.view_op(at.as_tensor(s))
     if not isinstance(node_in, (list, tuple)):
         node_in = [node_in]
     if not isinstance(node_out, (list, tuple)):
@@ -162,7 +162,7 @@ def try_to_set_test_value(node_in, node_out, s):
                 o.tag.test_value = tv
 
 
-class ObjectiveUpdates(aesara.OrderedUpdates):
+class ObjectiveUpdates(pytensor.OrderedUpdates):
     """OrderedUpdates extension for storing loss"""
 
     loss = None
@@ -303,7 +303,7 @@ def add_obj_updates(
         if self.op.returns_loss:
             updates.loss = obj_target
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def step_function(
         self,
         obj_n_mc=None,
@@ -347,13 +347,13 @@ def step_function(
         score: `bool`
             calculate loss on each step? Defaults to False for speed
         fn_kwargs: `dict`
-            Add kwargs to aesara.function (e.g. `{'profile': True}`)
+            Add kwargs to pytensor.function (e.g. `{'profile': True}`)
         more_replacements: `dict`
             Apply custom replacements before calculating gradients
 
         Returns
         -------
-        `aesara.function`
+        `pytensor.function`
         """
         if fn_kwargs is None:
             fn_kwargs = {}
@@ -376,7 +376,7 @@ def step_function(
             step_fn = compile_pymc([], [], updates=updates, **fn_kwargs)
         return step_fn
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def score_function(
         self, sc_n_mc=None, more_replacements=None, fn_kwargs=None
     ):  # pragma: no cover
@@ -389,11 +389,11 @@ def score_function(
         more_replacements:
             Apply custom replacements before compiling a function
         fn_kwargs: `dict`
-            arbitrary kwargs passed to `aesara.function`
+            arbitrary kwargs passed to `pytensor.function`
 
         Returns
         -------
-        aesara.function
+        pytensor.function
         """
         if fn_kwargs is None:
             fn_kwargs = {}
@@ -404,7 +404,7 @@ def score_function(
         loss = self(sc_n_mc, more_replacements=more_replacements)
         return compile_pymc([], loss, **fn_kwargs)
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def __call__(self, nmc, **kwargs):
         if "more_tf_params" in kwargs:
             m = -1.0
@@ -512,7 +512,7 @@ def collect_shared_to_list(params):
         return list(
             t[1]
             for t in sorted(params.items(), key=lambda t: t[0])
-            if isinstance(t[1], aesara.compile.SharedVariable)
+            if isinstance(t[1], pytensor.compile.SharedVariable)
         )
     elif params is None:
         return []
@@ -819,7 +819,7 @@ def _input_type(self, name):
         """
         return at.vector(name)
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         if not group:
             raise GroupError("Got empty group")
@@ -963,7 +963,7 @@ def symbolic_random(self):
         """
         raise NotImplementedError
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def set_size_and_deterministic(
         self, node: Variable, s, d: bool, more_replacements: dict | None = None
     ) -> list[Variable]:
@@ -973,7 +973,7 @@ def set_size_and_deterministic(
         Parameters
         ----------
         node: :class:`Variable`
-            Aesara node with symbolically applied VI replacements
+            PyTensor node with symbolically applied VI replacements
         s: scalar
             desired number of samples
         d: bool or int
@@ -986,13 +986,13 @@ def set_size_and_deterministic(
         :class:`Variable` with applied replacements, ready to use
         """
         flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
-        node_out = aesara.clone_replace(node, flat2rand)
+        node_out = pytensor.clone_replace(node, flat2rand)
         try_to_set_test_value(node, node_out, s)
         return node_out
 
     def to_flat_input(self, node):
         """*Dev* - replace vars with flattened view stored in `self.inputs`"""
-        return aesara.clone_replace(node, self.replacements)
+        return pytensor.clone_replace(node, self.replacements)
 
     def symbolic_sample_over_posterior(self, node):
         """*Dev* - performs sampling of node applying independent samples from posterior each time.
@@ -1003,9 +1003,9 @@ def symbolic_sample_over_posterior(self, node):
         random = at.specify_shape(random, self.symbolic_initial.type.shape)
 
         def sample(post, node):
-            return aesara.clone_replace(node, {self.input: post})
+            return pytensor.clone_replace(node, {self.input: post})
 
-        nodes, _ = aesara.scan(sample, random, non_sequences=[node])
+        nodes, _ = pytensor.scan(sample, random, non_sequences=[node])
         return nodes
 
     def symbolic_single_sample(self, node):
@@ -1015,7 +1015,7 @@ def symbolic_single_sample(self, node):
         """
         node = self.to_flat_input(node)
         random = self.symbolic_random.astype(self.symbolic_initial.dtype)
-        return aesara.clone_replace(node, {self.input: random[0]})
+        return pytensor.clone_replace(node, {self.input: random[0]})
 
     def make_size_and_deterministic_replacements(self, s, d, more_replacements=None):
         """*Dev* - creates correct replacements for initial depending on
@@ -1037,7 +1037,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
         initial = self._new_initial(s, d, more_replacements)
         initial = at.specify_shape(initial, self.symbolic_initial.type.shape)
         if more_replacements:
-            initial = aesara.clone_replace(initial, more_replacements)
+            initial = pytensor.clone_replace(initial, more_replacements)
         return {self.symbolic_initial: initial}
 
     @node_property
@@ -1128,7 +1128,7 @@ class Approximation(WithMemoization):
     """
 
     def __init__(self, groups, model=None):
-        self._scale_cost_to_minibatch = aesara.shared(np.int8(1))
+        self._scale_cost_to_minibatch = pytensor.shared(np.int8(1))
         model = modelcontext(model)
         if not model.free_RVs:
             raise TypeError("Model does not have an free RVs")
@@ -1211,7 +1211,7 @@ def logq_norm(self):
 
     @node_property
     def _sized_symbolic_varlogp_and_datalogp(self):
-        """*Dev* - computes sampled prior term from model via `aesara.scan`"""
+        """*Dev* - computes sampled prior term from model via `pytensor.scan`"""
         varlogp_s, datalogp_s = self.symbolic_sample_over_posterior(
             [self.model.varlogp, self.model.datalogp]
         )
@@ -1219,55 +1219,55 @@ def _sized_symbolic_varlogp_and_datalogp(self):
 
     @node_property
     def sized_symbolic_varlogp(self):
-        """*Dev* - computes sampled prior term from model via `aesara.scan`"""
+        """*Dev* - computes sampled prior term from model via `pytensor.scan`"""
         return self._sized_symbolic_varlogp_and_datalogp[0]  # shape (s,)
 
     @node_property
     def sized_symbolic_datalogp(self):
-        """*Dev* - computes sampled data term from model via `aesara.scan`"""
+        """*Dev* - computes sampled data term from model via `pytensor.scan`"""
         return self._sized_symbolic_varlogp_and_datalogp[1]  # shape (s,)
 
     @node_property
     def sized_symbolic_logp(self):
-        """*Dev* - computes sampled logP from model via `aesara.scan`"""
+        """*Dev* - computes sampled logP from model via `pytensor.scan`"""
         return self.sized_symbolic_varlogp + self.sized_symbolic_datalogp  # shape (s,)
 
     @node_property
     def logp(self):
-        """*Dev* - computes :math:`E_{q}(logP)` from model via `aesara.scan` that can be optimized later"""
+        """*Dev* - computes :math:`E_{q}(logP)` from model via `pytensor.scan` that can be optimized later"""
         return self.varlogp + self.datalogp
 
     @node_property
     def varlogp(self):
-        """*Dev* - computes :math:`E_{q}(prior term)` from model via `aesara.scan` that can be optimized later"""
+        """*Dev* - computes :math:`E_{q}(prior term)` from model via `pytensor.scan` that can be optimized later"""
         return self.sized_symbolic_varlogp.mean(0)
 
     @node_property
     def datalogp(self):
-        """*Dev* - computes :math:`E_{q}(data term)` from model via `aesara.scan` that can be optimized later"""
+        """*Dev* - computes :math:`E_{q}(data term)` from model via `pytensor.scan` that can be optimized later"""
         return self.sized_symbolic_datalogp.mean(0)
 
     @node_property
     def _single_symbolic_varlogp_and_datalogp(self):
-        """*Dev* - computes sampled prior term from model via `aesara.scan`"""
+        """*Dev* - computes sampled prior term from model via `pytensor.scan`"""
         varlogp, datalogp = self.symbolic_single_sample([self.model.varlogp, self.model.datalogp])
         return varlogp, datalogp
 
     @node_property
     def single_symbolic_varlogp(self):
-        """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `aesara.scan`
+        """*Dev* - for single MC sample estimate of :math:`E_{q}(prior term)` `pytensor.scan`
         is not needed and code can be optimized"""
         return self._single_symbolic_varlogp_and_datalogp[0]
 
     @node_property
     def single_symbolic_datalogp(self):
-        """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `aesara.scan`
+        """*Dev* - for single MC sample estimate of :math:`E_{q}(data term)` `pytensor.scan`
         is not needed and code can be optimized"""
         return self._single_symbolic_varlogp_and_datalogp[1]
 
     @node_property
     def single_symbolic_logp(self):
-        """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `aesara.scan`
+        """*Dev* - for single MC sample estimate of :math:`E_{q}(logP)` `pytensor.scan`
         is not needed and code can be optimized"""
         return self.single_symbolic_datalogp + self.single_symbolic_varlogp
 
@@ -1318,7 +1318,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
         flat2rand.update(more_replacements)
         return flat2rand
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
         :func:`symbolic_single_sample` new random generator can be allocated and applied to node
@@ -1326,7 +1326,7 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         Parameters
         ----------
         node: :class:`Variable`
-            Aesara node with symbolically applied VI replacements
+            PyTensor node with symbolically applied VI replacements
         s: scalar
             desired number of samples
         d: bool or int
@@ -1341,16 +1341,16 @@ def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         _node = node
         optimizations = self.get_optimization_replacements(s, d)
         flat2rand = self.make_size_and_deterministic_replacements(s, d, more_replacements)
-        node = aesara.clone_replace(node, optimizations)
-        node = aesara.clone_replace(node, flat2rand)
+        node = pytensor.clone_replace(node, optimizations)
+        node = pytensor.clone_replace(node, flat2rand)
         try_to_set_test_value(_node, node, s)
         return node
 
     def to_flat_input(self, node, more_replacements=None):
         """*Dev* - replace vars with flattened view stored in `self.inputs`"""
         more_replacements = more_replacements or {}
-        node = aesara.clone_replace(node, more_replacements)
-        return aesara.clone_replace(node, self.replacements)
+        node = pytensor.clone_replace(node, more_replacements)
+        return pytensor.clone_replace(node, self.replacements)
 
     def symbolic_sample_over_posterior(self, node, more_replacements=None):
         """*Dev* - performs sampling of node applying independent samples from posterior each time.
@@ -1359,9 +1359,9 @@ def symbolic_sample_over_posterior(self, node, more_replacements=None):
         node = self.to_flat_input(node, more_replacements=more_replacements)
 
         def sample(*post):
-            return aesara.clone_replace(node, dict(zip(self.inputs, post)))
+            return pytensor.clone_replace(node, dict(zip(self.inputs, post)))
 
-        nodes, _ = aesara.scan(sample, self.symbolic_randoms)
+        nodes, _ = pytensor.scan(sample, self.symbolic_randoms)
         return nodes
 
     def symbolic_single_sample(self, node, more_replacements=None):
@@ -1372,11 +1372,11 @@ def symbolic_single_sample(self, node, more_replacements=None):
         node = self.to_flat_input(node, more_replacements=more_replacements)
         post = [v[0] for v in self.symbolic_randoms]
         inp = self.inputs
-        return aesara.clone_replace(node, dict(zip(inp, post)))
+        return pytensor.clone_replace(node, dict(zip(inp, post)))
 
     def get_optimization_replacements(self, s, d):
         """*Dev* - optimizations for logP. If sample size is static and equal to 1:
-        then `aesara.scan` MC estimate is replaced with single sample without call to `aesara.scan`.
+        then `pytensor.scan` MC estimate is replaced with single sample without call to `pytensor.scan`.
         """
         repl = collections.OrderedDict()
         # avoid scan if size is constant and equal to one
@@ -1385,13 +1385,13 @@ def get_optimization_replacements(self, s, d):
             repl[self.datalogp] = self.single_symbolic_datalogp
         return repl
 
-    @aesara.config.change_flags(compute_test_value="off")
+    @pytensor.config.change_flags(compute_test_value="off")
     def sample_node(self, node, size=None, deterministic=False, more_replacements=None):
         """Samples given node or nodes over shared posterior
 
         Parameters
         ----------
-        node: Aesara Variables (or Aesara expressions)
+        node: PyTensor Variables (or PyTensor expressions)
         size: None or scalar
             number of samples
         more_replacements: `dict`
@@ -1406,7 +1406,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No
         """
         node_in = node
         if more_replacements:
-            node = aesara.clone_replace(node, more_replacements)
+            node = pytensor.clone_replace(node, more_replacements)
         if not isinstance(node, (list, tuple)):
             node = [node]
         node = self.model.replace_rvs_by_values(node)
@@ -1421,7 +1421,7 @@ def sample_node(self, node, size=None, deterministic=False, more_replacements=No
         return node_out
 
     def rslice(self, name):
-        """*Dev* - vectorized sampling for named random variable without call to `aesara.scan`.
+        """*Dev* - vectorized sampling for named random variable without call to `pytensor.scan`.
         This node still needs :func:`set_size_and_deterministic` to be evaluated
         """
 
diff --git a/pymc/variational/stein.py b/pymc/variational/stein.py
index 8630d58d4..5d36a074e 100644
--- a/pymc/variational/stein.py
+++ b/pymc/variational/stein.py
@@ -12,10 +12,10 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-import aesara
-import aesara.tensor as at
+import pytensor
+import pytensor.tensor as at
 
-from pymc.aesaraf import floatX
+from pymc.pytensorf import floatX
 from pymc.util import WithMemoization, locally_cachedmethod
 from pymc.variational.opvi import node_property
 from pymc.variational.test_functions import rbf
@@ -84,7 +84,7 @@ def dxkxy(self):
     def logp_norm(self):
         sized_symbolic_logp = self.approx.sized_symbolic_logp
         if self.use_histogram:
-            sized_symbolic_logp = aesara.clone_replace(
+            sized_symbolic_logp = pytensor.clone_replace(
                 sized_symbolic_logp,
                 dict(zip(self.approx.symbolic_randoms, self.approx.collect("histogram"))),
             )
diff --git a/pymc/variational/test_functions.py b/pymc/variational/test_functions.py
index b54f6e6c7..50a14c716 100644
--- a/pymc/variational/test_functions.py
+++ b/pymc/variational/test_functions.py
@@ -12,9 +12,9 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-from aesara import tensor as at
+from pytensor import tensor as at
 
-from pymc.aesaraf import floatX
+from pymc.pytensorf import floatX
 from pymc.variational.opvi import TestFunction
 
 __all__ = ["rbf"]
diff --git a/pymc/variational/updates.py b/pymc/variational/updates.py
index 21072afd9..e712ab259 100644
--- a/pymc/variational/updates.py
+++ b/pymc/variational/updates.py
@@ -44,7 +44,7 @@
 # SOFTWARE.
 
 """
-Functions to generate Aesara update dictionaries for training.
+Functions to generate PyTensor update dictionaries for training.
 
 The update functions implement different methods to control the learning
 rate for use with stochastic gradient descent.
@@ -88,7 +88,7 @@
 Examples
 --------
 >>> import lasagne
->>> import aesara
+>>> import pytensor
 >>> from lasagne.nonlinearities import softmax
 >>> from lasagne.layers import InputLayer, DenseLayer, get_output
 >>> from lasagne.updates import sgd, apply_momentum
@@ -101,7 +101,7 @@
 >>> loss = at.mean(at.nnet.categorical_crossentropy(l_out, y))
 >>> updates_sgd = sgd(loss, params, learning_rate=0.0001)
 >>> updates = apply_momentum(updates_sgd, params, momentum=0.9)
->>> train_function = aesara.function([x, y], updates=updates)
+>>> train_function = pytensor.function([x, y], updates=updates)
 
 Notes
 -----
@@ -111,9 +111,9 @@
 from collections import OrderedDict
 from functools import partial
 
-import aesara
-import aesara.tensor as at
 import numpy as np
+import pytensor
+import pytensor.tensor as at
 
 import pymc as pm
 
@@ -151,7 +151,7 @@ def get_or_compute_grads(loss_or_grads, params):
         gradients and returned as is, unless it does not match the length
         of `params`, in which case a `ValueError` is raised.
         Otherwise, `loss_or_grads` is assumed to be a cost expression and
-        the function returns `aesara.grad(loss_or_grads, params)`.
+        the function returns `pytensor.grad(loss_or_grads, params)`.
 
     Raises
     ------
@@ -160,7 +160,7 @@ def get_or_compute_grads(loss_or_grads, params):
         any element of `params` is not a shared variable (while we could still
         compute its gradient, we can never update it and want to fail early).
     """
-    if any(not isinstance(p, aesara.compile.SharedVariable) for p in params):
+    if any(not isinstance(p, pytensor.compile.SharedVariable) for p in params):
         raise ValueError(
             "params must contain shared variables only. If it "
             "contains arbitrary parameter expressions, then "
@@ -173,7 +173,7 @@ def get_or_compute_grads(loss_or_grads, params):
             )
         return loss_or_grads
     else:
-        return aesara.grad(loss_or_grads, params)
+        return pytensor.grad(loss_or_grads, params)
 
 
 def _get_call_kwargs(_locals_):
@@ -211,7 +211,7 @@ def sgd(loss_or_grads=None, params=None, learning_rate=1e-3):
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = sgd(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -275,7 +275,7 @@ def apply_momentum(updates, params=None, momentum=0.9):
 
     for param in params:
         value = param.get_value(borrow=True)
-        velocity = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        velocity = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
         x = momentum * velocity + updates[param]
         updates[velocity] = x - param
         updates[param] = x
@@ -323,7 +323,7 @@ def momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momentum=0.9):
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = momentum(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -388,7 +388,7 @@ def apply_nesterov_momentum(updates, params=None, momentum=0.9):
 
     for param in params:
         value = param.get_value(borrow=True)
-        velocity = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        velocity = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
         x = momentum * velocity + updates[param] - param
         updates[velocity] = x
         updates[param] = momentum * x + updates[param]
@@ -441,7 +441,7 @@ def nesterov_momentum(loss_or_grads=None, params=None, learning_rate=1e-3, momen
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = nesterov_momentum(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -509,7 +509,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = adagrad(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -530,7 +530,7 @@ def adagrad(loss_or_grads=None, params=None, learning_rate=1.0, epsilon=1e-6):
 
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
-        accu = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        accu = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
         accu_new = accu + grad**2
         updates[accu] = accu_new
         updates[param] = param - (learning_rate * grad / at.sqrt(accu_new + epsilon))
@@ -567,10 +567,10 @@ def adagrad_window(loss_or_grads=None, params=None, learning_rate=0.001, epsilon
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
     for param, grad in zip(params, grads):
-        i = aesara.shared(pm.floatX(0))
+        i = pytensor.shared(pm.floatX(0))
         i_int = i.astype("int32")
         value = param.get_value(borrow=True)
-        accu = aesara.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype))
+        accu = pytensor.shared(np.zeros(value.shape + (n_win,), dtype=value.dtype))
 
         # Append squared gradient vector to accu_new
         accu_new = at.set_subtensor(accu[..., i_int], grad**2)
@@ -632,7 +632,7 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = rmsprop(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -651,12 +651,12 @@ def rmsprop(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.9, epsilon
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
 
-    # Using aesara constant to prevent upcasting of float32
+    # Using pytensor constant to prevent upcasting of float32
     one = at.constant(1)
 
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
-        accu = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        accu = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
         accu_new = rho * accu + (one - rho) * grad**2
         updates[accu] = accu_new
         updates[param] = param - (learning_rate * grad / at.sqrt(accu_new + epsilon))
@@ -722,7 +722,7 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = adadelta(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -741,15 +741,17 @@ def adadelta(loss_or_grads=None, params=None, learning_rate=1.0, rho=0.95, epsil
     grads = get_or_compute_grads(loss_or_grads, params)
     updates = OrderedDict()
 
-    # Using aesara constant to prevent upcasting of float32
+    # Using pytensor constant to prevent upcasting of float32
     one = at.constant(1)
 
     for param, grad in zip(params, grads):
         value = param.get_value(borrow=True)
         # accu: accumulate gradient magnitudes
-        accu = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        accu = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
         # delta_accu: accumulate update magnitudes (recursively!)
-        delta_accu = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        delta_accu = pytensor.shared(
+            np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape
+        )
 
         # update accu (as in rmsprop)
         accu_new = rho * accu + (one - rho) * grad**2
@@ -810,7 +812,7 @@ def adam(
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = adam(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -827,10 +829,10 @@ def adam(
     elif loss_or_grads is None or params is None:
         raise ValueError("Please provide both `loss_or_grads` and `params` to get updates")
     all_grads = get_or_compute_grads(loss_or_grads, params)
-    t_prev = aesara.shared(pm.aesaraf.floatX(0.0))
+    t_prev = pytensor.shared(pm.pytensorf.floatX(0.0))
     updates = OrderedDict()
 
-    # Using aesara constant to prevent upcasting of float32
+    # Using pytensor constant to prevent upcasting of float32
     one = at.constant(1)
 
     t = t_prev + 1
@@ -838,8 +840,8 @@ def adam(
 
     for param, g_t in zip(params, all_grads):
         value = param.get_value(borrow=True)
-        m_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
-        v_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        m_prev = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        v_prev = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
 
         m_t = beta1 * m_prev + (one - beta1) * g_t
         v_t = beta2 * v_prev + (one - beta2) * g_t**2
@@ -894,7 +896,7 @@ def adamax(
 
     Examples
     --------
-    >>> a = aesara.shared(1.)
+    >>> a = pytensor.shared(1.)
     >>> b = a*2
     >>> updates = adamax(b, [a], learning_rate=.01)
     >>> isinstance(updates, dict)
@@ -911,10 +913,10 @@ def adamax(
     elif loss_or_grads is None or params is None:
         raise ValueError("Please provide both `loss_or_grads` and `params` to get updates")
     all_grads = get_or_compute_grads(loss_or_grads, params)
-    t_prev = aesara.shared(pm.aesaraf.floatX(0.0))
+    t_prev = pytensor.shared(pm.pytensorf.floatX(0.0))
     updates = OrderedDict()
 
-    # Using aesara constant to prevent upcasting of float32
+    # Using pytensor constant to prevent upcasting of float32
     one = at.constant(1)
 
     t = t_prev + 1
@@ -922,8 +924,8 @@ def adamax(
 
     for param, g_t in zip(params, all_grads):
         value = param.get_value(borrow=True)
-        m_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
-        u_prev = aesara.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        m_prev = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
+        u_prev = pytensor.shared(np.zeros(value.shape, dtype=value.dtype), shape=param.type.shape)
 
         m_t = beta1 * m_prev + (one - beta1) * g_t
         u_t = at.maximum(beta2 * u_prev, abs(g_t))
@@ -947,7 +949,7 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
     Parameters
     ----------
     tensor_var: TensorVariable
-        Aesara expression for update, gradient, or other quantity.
+        PyTensor expression for update, gradient, or other quantity.
     max_norm: scalar
         This value sets the maximum allowed value of any norm in
         `tensor_var`.
@@ -972,11 +974,11 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
 
     Examples
     --------
-    >>> param = aesara.shared(
-    ...     np.random.randn(100, 200).astype(aesara.config.floatX))
+    >>> param = pytensor.shared(
+    ...     np.random.randn(100, 200).astype(pytensor.config.floatX))
     >>> update = param + 100
     >>> update = norm_constraint(update, 10)
-    >>> func = aesara.function([], [], updates=[(param, update)])
+    >>> func = pytensor.function([], [], updates=[(param, update)])
     >>> # Apply constrained update
     >>> _ = func()
     >>> from lasagne.utils import compute_norms
@@ -1007,7 +1009,7 @@ def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
             "Unsupported tensor dimensionality {}." "Must specify `norm_axes`".format(ndim)
         )
 
-    dtype = np.dtype(aesara.config.floatX).type
+    dtype = np.dtype(pytensor.config.floatX).type
     norms = at.sqrt(at.sum(at.sqr(tensor_var), axis=sum_over, keepdims=True))
     target_norms = at.clip(norms, 0, dtype(max_norm))
     constrained_output = tensor_var * (target_norms / (dtype(epsilon) + norms))
@@ -1040,7 +1042,7 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False
     -------
     tensor_vars_scaled: list of TensorVariables
         The scaled tensor variables.
-    norm: Aesara scalar
+    norm: PyTensor scalar
         The combined norms of the input variables prior to rescaling,
         only returned if ``return_norms=True``.
 
@@ -1071,7 +1073,7 @@ def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7, return_norm=False
        Processing Systems (pp. 3104-3112).
     """
     norm = at.sqrt(sum(at.sum(tensor**2) for tensor in tensor_vars))
-    dtype = np.dtype(aesara.config.floatX).type
+    dtype = np.dtype(pytensor.config.floatX).type
     target_norm = at.clip(norm, 0, dtype(max_norm))
     multiplier = target_norm / (dtype(epsilon) + norm)
     tensor_vars_scaled = [step * multiplier for step in tensor_vars]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index e5bc62a3d..714bc30d4 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,7 +1,6 @@
 # This file is auto-generated by scripts/generate_pip_deps_from_conda.py, do not modify.
 # See that file for comments about the need/usage of each dependency.
 
-aesara==2.8.8
 arviz>=0.13.0
 cachetools>=4.2.1
 cloudpickle
@@ -18,6 +17,7 @@ numpydoc
 pandas>=0.24.0
 polyagamma
 pre-commit>=2.8.0
+pytensor==2.8.10
 pytest-cov>=2.5
 pytest>=3.0
 scipy>=1.4.1
diff --git a/requirements.txt b/requirements.txt
index a2cfafe2d..4f64eb947 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
-aesara==2.8.8
 arviz>=0.13.0
 cachetools>=4.2.1
 cloudpickle
 fastprogress>=0.2.0
 numpy>=1.15.0
 pandas>=0.24.0
+pytensor==2.8.10
 scipy>=1.4.1
 typing-extensions>=3.7.4
diff --git a/scripts/test.sh b/scripts/test.sh
index faebef4e4..640f2bbc5 100755
--- a/scripts/test.sh
+++ b/scripts/test.sh
@@ -3,4 +3,4 @@
 set -e
 
 _FLOATX=${FLOATX:=float64}
-AESARA_FLAGS="floatX=${_FLOATX},gcc__cxxflags='-march=core2'" pytest -v --cov=pymc --cov-report=xml "$@" --cov-report term
+PYTENSOR_FLAGS="floatX=${_FLOATX},gcc__cxxflags='-march=core2'" pytest -v --cov=pymc --cov-report=xml "$@" --cov-report term
diff --git a/setup.py b/setup.py
index e88e837ae..bdfcaaa18 100755
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@
 
 import versioneer
 
-DESCRIPTION = "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Aesara"
+DESCRIPTION = "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor"
 AUTHOR = "PyMC Developers"
 AUTHOR_EMAIL = "pymc.devs@gmail.com"
 URL = "http://github.com/pymc-devs/pymc"