Uploaded using `kernel-builder`.
Browse files- benchmarks/benchmark.py +15 -2
- build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
- build/torch211-cxx11-cu128-x86_64-linux/_ops.py +3 -3
- build/torch211-cxx11-cu128-x86_64-linux/metadata.json +1 -1
- build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
- build/torch211-cxx11-cu130-x86_64-linux/_ops.py +3 -3
- build/torch211-cxx11-cu130-x86_64-linux/metadata.json +1 -1
- build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
- build/torch212-cxx11-cu130-x86_64-linux/_ops.py +3 -3
- build/torch212-cxx11-cu130-x86_64-linux/metadata.json +1 -1
- build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} +2 -2
- build/torch212-cxx11-cu132-x86_64-linux/_ops.py +3 -3
- build/torch212-cxx11-cu132-x86_64-linux/metadata.json +1 -1
benchmarks/benchmark.py
CHANGED
|
@@ -201,6 +201,12 @@ def load_installed_ops(artifact: str | None):
|
|
| 201 |
sys.path.remove(artifact)
|
| 202 |
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
def quantize_fp8(x: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
|
| 205 |
return torch.clamp(x.float() / scale.float(), -448.0, 448.0).to(torch.float8_e4m3fn)
|
| 206 |
|
|
@@ -438,8 +444,10 @@ def write_markdown(path: Path, results: list[Result], args) -> None:
|
|
| 438 |
|
| 439 |
def main() -> None:
|
| 440 |
parser = argparse.ArgumentParser()
|
| 441 |
-
parser.add_argument("--backend", choices=["source", "installed"], default="source")
|
| 442 |
parser.add_argument("--artifact", default=None)
|
|
|
|
|
|
|
| 443 |
parser.add_argument("--shapes", default="all")
|
| 444 |
parser.add_argument("--warmup", type=int, default=5)
|
| 445 |
parser.add_argument("--iters", type=int, default=20)
|
|
@@ -463,7 +471,12 @@ def main() -> None:
|
|
| 463 |
if not torch.cuda.is_available():
|
| 464 |
raise SystemExit("CUDA is required")
|
| 465 |
torch.manual_seed(17)
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
requested = [s.strip() for s in args.shapes.split(",")]
|
| 468 |
names: list[str] = []
|
| 469 |
for item in requested:
|
|
|
|
| 201 |
sys.path.remove(artifact)
|
| 202 |
|
| 203 |
|
| 204 |
+
def load_hub_ops(repo_id: str, version: int):
|
| 205 |
+
from kernels import get_kernel
|
| 206 |
+
|
| 207 |
+
return get_kernel(repo_id, version=version, trust_remote_code=True)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
def quantize_fp8(x: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
|
| 211 |
return torch.clamp(x.float() / scale.float(), -448.0, 448.0).to(torch.float8_e4m3fn)
|
| 212 |
|
|
|
|
| 444 |
|
| 445 |
def main() -> None:
|
| 446 |
parser = argparse.ArgumentParser()
|
| 447 |
+
parser.add_argument("--backend", choices=["source", "installed", "hub"], default="source")
|
| 448 |
parser.add_argument("--artifact", default=None)
|
| 449 |
+
parser.add_argument("--repo-id", default="flashrt/flashrt-fp8-ffn")
|
| 450 |
+
parser.add_argument("--version", type=int, default=1)
|
| 451 |
parser.add_argument("--shapes", default="all")
|
| 452 |
parser.add_argument("--warmup", type=int, default=5)
|
| 453 |
parser.add_argument("--iters", type=int, default=20)
|
|
|
|
| 471 |
if not torch.cuda.is_available():
|
| 472 |
raise SystemExit("CUDA is required")
|
| 473 |
torch.manual_seed(17)
|
| 474 |
+
if args.backend == "source":
|
| 475 |
+
ops = load_source_ops()
|
| 476 |
+
elif args.backend == "installed":
|
| 477 |
+
ops = load_installed_ops(args.artifact)
|
| 478 |
+
else:
|
| 479 |
+
ops = load_hub_ops(args.repo_id, args.version)
|
| 480 |
requested = [s.strip() for s in args.shapes.split(",")]
|
| 481 |
names: list[str] = []
|
| 482 |
for item in requested:
|
build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cba29914868c02e1355bef5aab55ef03909936318d9e03747333ab0b3a8cd94c
|
| 3 |
+
size 309712
|
build/torch211-cxx11-cu128-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flashrt_fp8_ffn_cuda_5de4768
|
| 3 |
+
ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
|
build/torch211-cxx11-cu128-x86_64-linux/metadata.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
-
"id": "
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|
|
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
+
"id": "_flashrt_fp8_ffn_cuda_5de4768",
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|
build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35453f166929a1191c848cadbb9a8a180692a585ea5a1c84ef42731af14ebbdf
|
| 3 |
+
size 307832
|
build/torch211-cxx11-cu130-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flashrt_fp8_ffn_cuda_5de4768
|
| 3 |
+
ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
|
build/torch211-cxx11-cu130-x86_64-linux/metadata.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
-
"id": "
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|
|
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
+
"id": "_flashrt_fp8_ffn_cuda_5de4768",
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|
build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6c063d5fccd81e2b697f6b37feb6864e8a56478fb0b7efbc1a992d69244c581
|
| 3 |
+
size 314080
|
build/torch212-cxx11-cu130-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flashrt_fp8_ffn_cuda_5de4768
|
| 3 |
+
ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
|
build/torch212-cxx11-cu130-x86_64-linux/metadata.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
-
"id": "
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|
|
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
+
"id": "_flashrt_fp8_ffn_cuda_5de4768",
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|
build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86bcfc15a0fb8f01e00b10ae650f19783ea298fd7ebda9dceef741ac7081610f
|
| 3 |
+
size 314080
|
build/torch212-cxx11-cu132-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flashrt_fp8_ffn_cuda_5de4768
|
| 3 |
+
ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
|
build/torch212-cxx11-cu132-x86_64-linux/metadata.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
-
"id": "
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|
|
|
|
| 1 |
{
|
| 2 |
"name": "flashrt-fp8-ffn",
|
| 3 |
+
"id": "_flashrt_fp8_ffn_cuda_5de4768",
|
| 4 |
"version": 1,
|
| 5 |
"license": "Apache-2.0",
|
| 6 |
"python-depends": [],
|