Uploaded using `kernel-builder`.

Browse files

Files changed (13) hide show

benchmarks/benchmark.py +15 -2
build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
build/torch211-cxx11-cu128-x86_64-linux/_ops.py +3 -3
build/torch211-cxx11-cu128-x86_64-linux/metadata.json +1 -1
build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
build/torch211-cxx11-cu130-x86_64-linux/_ops.py +3 -3
build/torch211-cxx11-cu130-x86_64-linux/metadata.json +1 -1
build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
build/torch212-cxx11-cu130-x86_64-linux/_ops.py +3 -3
build/torch212-cxx11-cu130-x86_64-linux/metadata.json +1 -1
build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
build/torch212-cxx11-cu132-x86_64-linux/_ops.py +3 -3
build/torch212-cxx11-cu132-x86_64-linux/metadata.json +1 -1

benchmarks/benchmark.py CHANGED Viewed

@@ -201,6 +201,12 @@ def load_installed_ops(artifact: str | None):
             sys.path.remove(artifact)
 def quantize_fp8(x: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
     return torch.clamp(x.float() / scale.float(), -448.0, 448.0).to(torch.float8_e4m3fn)
@@ -438,8 +444,10 @@ def write_markdown(path: Path, results: list[Result], args) -> None:
 def main() -> None:
     parser = argparse.ArgumentParser()
-    parser.add_argument("--backend", choices=["source", "installed"], default="source")
     parser.add_argument("--artifact", default=None)
     parser.add_argument("--shapes", default="all")
     parser.add_argument("--warmup", type=int, default=5)
     parser.add_argument("--iters", type=int, default=20)
@@ -463,7 +471,12 @@ def main() -> None:
     if not torch.cuda.is_available():
         raise SystemExit("CUDA is required")
     torch.manual_seed(17)
-    ops = load_source_ops() if args.backend == "source" else load_installed_ops(args.artifact)
     requested = [s.strip() for s in args.shapes.split(",")]
     names: list[str] = []
     for item in requested:

             sys.path.remove(artifact)
+def load_hub_ops(repo_id: str, version: int):
+    from kernels import get_kernel
+    return get_kernel(repo_id, version=version, trust_remote_code=True)
 def quantize_fp8(x: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
     return torch.clamp(x.float() / scale.float(), -448.0, 448.0).to(torch.float8_e4m3fn)
 def main() -> None:
     parser = argparse.ArgumentParser()
+    parser.add_argument("--backend", choices=["source", "installed", "hub"], default="source")
     parser.add_argument("--artifact", default=None)
+    parser.add_argument("--repo-id", default="flashrt/flashrt-fp8-ffn")
+    parser.add_argument("--version", type=int, default=1)
     parser.add_argument("--shapes", default="all")
     parser.add_argument("--warmup", type=int, default=5)
     parser.add_argument("--iters", type=int, default=20)
     if not torch.cuda.is_available():
         raise SystemExit("CUDA is required")
     torch.manual_seed(17)
+    if args.backend == "source":
+        ops = load_source_ops()
+    elif args.backend == "installed":
+        ops = load_installed_ops(args.artifact)
+    else:
+        ops = load_hub_ops(args.repo_id, args.version)
     requested = [s.strip() for s in args.shapes.split(",")]
     names: list[str] = []
     for item in requested:

build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfc90be7bacf4046348951be8440728ede008ac9a5df78da6ba64275fa09ba3d
-size 309744

 version https://git-lfs.github.com/spec/v1
+oid sha256:cba29914868c02e1355bef5aab55ef03909936318d9e03747333ab0b3a8cd94c
+size 309712

build/torch211-cxx11-cu128-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_fp8_ffn_cuda_c4d802d
-ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"

 import torch
+from . import _flashrt_fp8_ffn_cuda_5de4768
+ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"

build/torch211-cxx11-cu128-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-fp8-ffn",
-  "id": "_flashrt_fp8_ffn_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-fp8-ffn",
+  "id": "_flashrt_fp8_ffn_cuda_5de4768",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd3c15cc8814d66a0e05382fa807119863e15a0da11f935041f9f58e96e5091d
-size 307864

 version https://git-lfs.github.com/spec/v1
+oid sha256:35453f166929a1191c848cadbb9a8a180692a585ea5a1c84ef42731af14ebbdf
+size 307832

build/torch211-cxx11-cu130-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_fp8_ffn_cuda_c4d802d
-ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"

 import torch
+from . import _flashrt_fp8_ffn_cuda_5de4768
+ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"

build/torch211-cxx11-cu130-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-fp8-ffn",
-  "id": "_flashrt_fp8_ffn_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-fp8-ffn",
+  "id": "_flashrt_fp8_ffn_cuda_5de4768",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1f871097905e3aab25d5cfd6af9c0bc69925b99007af8dbbb9645b818fed9e9
-size 314112

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6c063d5fccd81e2b697f6b37feb6864e8a56478fb0b7efbc1a992d69244c581
+size 314080

build/torch212-cxx11-cu130-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_fp8_ffn_cuda_c4d802d
-ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"

 import torch
+from . import _flashrt_fp8_ffn_cuda_5de4768
+ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"

build/torch212-cxx11-cu130-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-fp8-ffn",
-  "id": "_flashrt_fp8_ffn_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-fp8-ffn",
+  "id": "_flashrt_fp8_ffn_cuda_5de4768",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f8328f110f9d95e7b0ad793107ec20df893e887ea1ebc4044c0b8a64a4c9210
-size 314112

 version https://git-lfs.github.com/spec/v1
+oid sha256:86bcfc15a0fb8f01e00b10ae650f19783ea298fd7ebda9dceef741ac7081610f
+size 314080

build/torch212-cxx11-cu132-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_fp8_ffn_cuda_c4d802d
-ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"

 import torch
+from . import _flashrt_fp8_ffn_cuda_5de4768
+ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"

build/torch212-cxx11-cu132-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-fp8-ffn",
-  "id": "_flashrt_fp8_ffn_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-fp8-ffn",
+  "id": "_flashrt_fp8_ffn_cuda_5de4768",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],