feat: small grayscale kernel

Files changed (10) hide show

.gitattributes +2 -0
.gitignore +10 -0
build.toml +16 -0
csrc/img2gray.cu +38 -0
flake.lock +168 -0
flake.nix +17 -0
scripts/sanity.py +23 -0
torch-ext/img2gray/__init__.py +18 -0
torch-ext/torch_binding.cpp +12 -0
torch-ext/torch_binding.h +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.so filter=lfs diff=lfs merge=lfs -text
2	+ *.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+cmake
+.venv
+__pycache__
+*.pyc
+torch-ext/img2gray/*.abi3.so
+torch-ext/img2gray/_ops.py
+torch-ext/registration.h
+CMakeLists.txt
+pyproject.toml
+setup.py

build.toml ADDED Viewed

	@@ -0,0 +1,16 @@

+[general]
+name = "img2gray"
+universal = false
+[torch]
+src = [
+  "torch-ext/torch_binding.cpp",
+  "torch-ext/torch_binding.h"
+]
+[kernel.img2gray]
+backend = "cuda"
+depends = ["torch"]
+src = [
+    "csrc/img2gray.cu",
+]

csrc/img2gray.cu ADDED Viewed

	@@ -0,0 +1,38 @@

+#include <cstdint>
+#include <torch/torch.h>
+// Define a kernel to convert RGB to Grayscale
+__global__ void img2gray_kernel(const uint8_t* input, uint8_t* output, int width, int height) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x < width && y < height) {
+        int idx = (y * width + x) * 3; // RGB has 3 channels
+        uint8_t r = input[idx];
+        uint8_t g = input[idx + 1];
+        uint8_t b = input[idx + 2];
+        // Convert to grayscale using luminosity method
+        uint8_t gray = static_cast<uint8_t>(0.21f * r + 0.72f * g + 0.07f * b);
+        output[y * width + x] = gray;
+    }
+}
+// Define a wrapper for this kernel to align with the PyTorch extension interface
+void img2gray_cuda(torch::Tensor input, torch::Tensor output) {
+    const int width = input.size(1);
+    const int height = input.size(0);
+    const dim3 blockSize(16, 16);
+    const dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y);
+    img2gray_kernel<<<gridSize, blockSize>>>(
+        input.data_ptr<uint8_t>(),
+        output.data_ptr<uint8_t>(),
+        width,
+        height
+    );
+    cudaDeviceSynchronize();
+}

flake.lock ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "nodes": {
+    "flake-compat": {
+      "locked": {
+        "lastModified": 1747046372,
+        "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-compat_2": {
+      "locked": {
+        "lastModified": 1733328505,
+        "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "flake-utils_2": {
+      "inputs": {
+        "systems": "systems_2"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "hf-nix": {
+      "inputs": {
+        "flake-compat": "flake-compat_2",
+        "flake-utils": "flake-utils_2",
+        "nixpkgs": "nixpkgs"
+      },
+      "locked": {
+        "lastModified": 1750234878,
+        "narHash": "sha256-q9DRC9zdpzUf88qqg1qbhP1qgJbE2cMtn8oUmosuyT8=",
+        "owner": "huggingface",
+        "repo": "hf-nix",
+        "rev": "c7132f90763d756da3e77da62e01be0a4546dc57",
+        "type": "github"
+      },
+      "original": {
+        "owner": "huggingface",
+        "repo": "hf-nix",
+        "type": "github"
+      }
+    },
+    "kernel-builder": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "flake-utils": "flake-utils",
+        "hf-nix": "hf-nix",
+        "nixpkgs": [
+          "kernel-builder",
+          "hf-nix",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1750790603,
+        "narHash": "sha256-m7FoTYWDV811Y7FiuJPa/uCOV63rf6LHxWportuI9h0=",
+        "owner": "huggingface",
+        "repo": "kernel-builder",
+        "rev": "37cad313efea84e213b2fc13b2ec808d273a126d",
+        "type": "github"
+      },
+      "original": {
+        "owner": "huggingface",
+        "repo": "kernel-builder",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1747820358,
+        "narHash": "sha256-fTqsZsUX6M3yeEvgyQvXcbGmT2CaRVyVwsi8eK29Oj4=",
+        "owner": "danieldk",
+        "repo": "nixpkgs",
+        "rev": "d3c1681180717528068082103bf323147de6ab0b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "danieldk",
+        "ref": "cudatoolkit-12.9-kernel-builder",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "kernel-builder": "kernel-builder"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
+    "systems_2": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}

flake.nix ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  description = "Flake for img2gray kernel";
+  inputs = {
+    kernel-builder.url = "github:huggingface/kernel-builder";
+  };
+  outputs =
+    {
+      self,
+      kernel-builder,
+    }:
+    kernel-builder.lib.genFlakeOutputs {
+      path = ./.;
+      rev = self.shortRev or self.dirtyShortRev or self.lastModifiedDate;
+    };
+}

scripts/sanity.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import torch
+import img2gray
+from PIL import Image
+import numpy as np
+print(dir(img2gray))
+img = Image.open("/home/ubuntu/Projects/img2gray/kernel-builder-logo-color.png").convert("RGB")
+img = np.array(img)
+img_tensor = torch.from_numpy(img)
+print(img_tensor.shape)  # HWC
+img_tensor = img_tensor.permute(2, 0, 1).unsqueeze(0).contiguous().cuda()  # BCHW
+print(img_tensor.shape)  # BCHW
+gray_tensor = img2gray.img2gray(img_tensor).squeeze()
+print(gray_tensor.shape)  # B1HW
+# save the output image
+gray_img = gray_tensor.cpu().numpy()  # 1HW -> HW
+gray_img = Image.fromarray(gray_img.astype(np.uint8), mode="L")
+gray_img.save("/home/ubuntu/Projects/img2gray/kernel-builder-logo-gray.png")

torch-ext/img2gray/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import torch
+from ._ops import ops
+def img2gray(input: torch.Tensor) -> torch.Tensor:
+    # we expect input to be in BCHW format
+    batch, channels, height, width = input.shape
+    assert channels == 3, "Input image must have 3 channels (RGB)"
+    output = torch.empty((batch, 1, height, width), device=input.device, dtype=input.dtype)
+    for b in range(batch):
+        single_image = input[b].permute(1, 2, 0).contiguous()  # HWC
+        single_output = output[b].reshape(height, width)  # HW
+        ops.img2gray(single_image, single_output)
+    return output

torch-ext/torch_binding.cpp ADDED Viewed

	@@ -0,0 +1,12 @@

+#include <torch/library.h>
+#include "registration.h"
+#include "torch_binding.h"
+TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
+    ops.def("img2gray(Tensor input, Tensor output) -> ()");
+    ops.impl("img2gray", torch::kCUDA, &img2gray_cuda);
+}
+REGISTER_EXTENSION(TORCH_EXTENSION_NAME)

torch-ext/torch_binding.h ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ #include <torch/torch.h>
2	+
3	+ void img2gray_cuda(torch::Tensor input, torch::Tensor output);