diff --git a/torchao/csrc/cuda/fp6_llm/fp6_linear.cu b/torchao/csrc/cuda/fp6_llm/fp6_linear.cu
index 1d44acde08..7f973e6987 100644
--- a/torchao/csrc/cuda/fp6_llm/fp6_linear.cu
+++ b/torchao/csrc/cuda/fp6_llm/fp6_linear.cu
@@ -14,6 +14,8 @@
 // 
 // This file is adapted from https://github.com/usyd-fsalab/fp6_llm/blob/5df6737cca32f604e957e3f63f03ccc2e4d1df0d/fp6_llm/csrc/fp6_linear.cu
 
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800  // at least Ampere
+
 #include "kernel_matmul.cuh"
 #include "kernel_reduction.cuh"
 
@@ -200,3 +202,5 @@ TORCH_LIBRARY_IMPL(torchao, CUDA, m) {
 }
 
 } // namespace torchao
+
+#endif
diff --git a/torchao/csrc/cuda/tensor_core_tiled_layout/tensor_core_tiled_layout.cu b/torchao/csrc/cuda/tensor_core_tiled_layout/tensor_core_tiled_layout.cu
index 652bba5ca6..7af29caac9 100644
--- a/torchao/csrc/cuda/tensor_core_tiled_layout/tensor_core_tiled_layout.cu
+++ b/torchao/csrc/cuda/tensor_core_tiled_layout/tensor_core_tiled_layout.cu
@@ -1,3 +1,5 @@
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800  // at least Ampere
+
 #include <ATen/ATen.h>
 #include <ATen/core/Tensor.h>
 #include <ATen/cuda/CUDAContext.h>
@@ -310,3 +312,5 @@ TORCH_LIBRARY_IMPL(torchao, CUDA, m) {
   m.impl("torchao::dequantize_tensor_core_tiled_layout", &_dequantize_tensor_core_tiled_layout);
 
 }
+
+#endif