liangsu9988 commited on
Commit
b208e84
·
verified ·
1 Parent(s): 8bd9340

Uploaded using `kernel-builder`.

Browse files
benchmarks/benchmark.py CHANGED
@@ -201,6 +201,12 @@ def load_installed_ops(artifact: str | None):
201
  sys.path.remove(artifact)
202
 
203
 
 
 
 
 
 
 
204
  def quantize_fp8(x: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
205
  return torch.clamp(x.float() / scale.float(), -448.0, 448.0).to(torch.float8_e4m3fn)
206
 
@@ -438,8 +444,10 @@ def write_markdown(path: Path, results: list[Result], args) -> None:
438
 
439
  def main() -> None:
440
  parser = argparse.ArgumentParser()
441
- parser.add_argument("--backend", choices=["source", "installed"], default="source")
442
  parser.add_argument("--artifact", default=None)
 
 
443
  parser.add_argument("--shapes", default="all")
444
  parser.add_argument("--warmup", type=int, default=5)
445
  parser.add_argument("--iters", type=int, default=20)
@@ -463,7 +471,12 @@ def main() -> None:
463
  if not torch.cuda.is_available():
464
  raise SystemExit("CUDA is required")
465
  torch.manual_seed(17)
466
- ops = load_source_ops() if args.backend == "source" else load_installed_ops(args.artifact)
 
 
 
 
 
467
  requested = [s.strip() for s in args.shapes.split(",")]
468
  names: list[str] = []
469
  for item in requested:
 
201
  sys.path.remove(artifact)
202
 
203
 
204
+ def load_hub_ops(repo_id: str, version: int):
205
+ from kernels import get_kernel
206
+
207
+ return get_kernel(repo_id, version=version, trust_remote_code=True)
208
+
209
+
210
  def quantize_fp8(x: torch.Tensor, scale: torch.Tensor) -> torch.Tensor:
211
  return torch.clamp(x.float() / scale.float(), -448.0, 448.0).to(torch.float8_e4m3fn)
212
 
 
444
 
445
  def main() -> None:
446
  parser = argparse.ArgumentParser()
447
+ parser.add_argument("--backend", choices=["source", "installed", "hub"], default="source")
448
  parser.add_argument("--artifact", default=None)
449
+ parser.add_argument("--repo-id", default="flashrt/flashrt-fp8-ffn")
450
+ parser.add_argument("--version", type=int, default=1)
451
  parser.add_argument("--shapes", default="all")
452
  parser.add_argument("--warmup", type=int, default=5)
453
  parser.add_argument("--iters", type=int, default=20)
 
471
  if not torch.cuda.is_available():
472
  raise SystemExit("CUDA is required")
473
  torch.manual_seed(17)
474
+ if args.backend == "source":
475
+ ops = load_source_ops()
476
+ elif args.backend == "installed":
477
+ ops = load_installed_ops(args.artifact)
478
+ else:
479
+ ops = load_hub_ops(args.repo_id, args.version)
480
  requested = [s.strip() for s in args.shapes.split(",")]
481
  names: list[str] = []
482
  for item in requested:
build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfc90be7bacf4046348951be8440728ede008ac9a5df78da6ba64275fa09ba3d
3
- size 309744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba29914868c02e1355bef5aab55ef03909936318d9e03747333ab0b3a8cd94c
3
+ size 309712
build/torch211-cxx11-cu128-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_fp8_ffn_cuda_c4d802d
3
- ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_fp8_ffn_cuda_5de4768
3
+ ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
build/torch211-cxx11-cu128-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-fp8-ffn",
3
- "id": "_flashrt_fp8_ffn_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-fp8-ffn",
3
+ "id": "_flashrt_fp8_ffn_cuda_5de4768",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd3c15cc8814d66a0e05382fa807119863e15a0da11f935041f9f58e96e5091d
3
- size 307864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35453f166929a1191c848cadbb9a8a180692a585ea5a1c84ef42731af14ebbdf
3
+ size 307832
build/torch211-cxx11-cu130-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_fp8_ffn_cuda_c4d802d
3
- ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_fp8_ffn_cuda_5de4768
3
+ ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
build/torch211-cxx11-cu130-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-fp8-ffn",
3
- "id": "_flashrt_fp8_ffn_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-fp8-ffn",
3
+ "id": "_flashrt_fp8_ffn_cuda_5de4768",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1f871097905e3aab25d5cfd6af9c0bc69925b99007af8dbbb9645b818fed9e9
3
- size 314112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c063d5fccd81e2b697f6b37feb6864e8a56478fb0b7efbc1a992d69244c581
3
+ size 314080
build/torch212-cxx11-cu130-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_fp8_ffn_cuda_c4d802d
3
- ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_fp8_ffn_cuda_5de4768
3
+ ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
build/torch212-cxx11-cu130-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-fp8-ffn",
3
- "id": "_flashrt_fp8_ffn_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-fp8-ffn",
3
+ "id": "_flashrt_fp8_ffn_cuda_5de4768",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_fp8_ffn_cuda_c4d802d.abi3.so → _flashrt_fp8_ffn_cuda_5de4768.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8328f110f9d95e7b0ad793107ec20df893e887ea1ebc4044c0b8a64a4c9210
3
- size 314112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86bcfc15a0fb8f01e00b10ae650f19783ea298fd7ebda9dceef741ac7081610f
3
+ size 314080
build/torch212-cxx11-cu132-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_fp8_ffn_cuda_c4d802d
3
- ops = torch.ops._flashrt_fp8_ffn_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_fp8_ffn_cuda_c4d802d::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_fp8_ffn_cuda_5de4768
3
+ ops = torch.ops._flashrt_fp8_ffn_cuda_5de4768
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_fp8_ffn_cuda_5de4768::{op_name}"
build/torch212-cxx11-cu132-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-fp8-ffn",
3
- "id": "_flashrt_fp8_ffn_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-fp8-ffn",
3
+ "id": "_flashrt_fp8_ffn_cuda_5de4768",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],