@@ -452,7 +452,6 @@ __global__ void RasterizeMeshesBackwardCudaKernel(
452
452
const bool inside = b_pp.x > 0 .0f && b_pp.y > 0 .0f && b_pp.z > 0 .0f ;
453
453
const float sign = inside ? -1 .0f : 1 .0f ;
454
454
455
- // TODO(T52813608) Add support for non-square images.
456
455
auto grad_dist_f = PointTriangleDistanceBackward (
457
456
pxy, v0xy, v1xy, v2xy, sign * grad_dist_upstream);
458
457
const float2 ddist_d_v0 = thrust::get<1 >(grad_dist_f);
@@ -606,7 +605,7 @@ __global__ void RasterizeMeshesCoarseCudaKernel(
606
605
const float half_pix_x = NDC_x_half_range / W;
607
606
const float half_pix_y = NDC_y_half_range / H;
608
607
609
- // This is a boolean array of shape (num_bins, num_bins , chunk_size)
608
+ // This is a boolean array of shape (num_bins_y, num_bins_x , chunk_size)
610
609
// stored in shared memory that will track whether each point in the chunk
611
610
// falls into each bin of the image.
612
611
BitMask binmask ((unsigned int *)sbuf, num_bins_y, num_bins_x, chunk_size);
@@ -755,7 +754,7 @@ at::Tensor RasterizeMeshesCoarseCuda(
755
754
const int num_bins_y = 1 + (H - 1 ) / bin_size;
756
755
const int num_bins_x = 1 + (W - 1 ) / bin_size;
757
756
758
- if (num_bins_y >= kMaxFacesPerBin || num_bins_x >= kMaxFacesPerBin ) {
757
+ if (num_bins_y >= kMaxItemsPerBin || num_bins_x >= kMaxItemsPerBin ) {
759
758
std::stringstream ss;
760
759
ss << " In Coarse Rasterizer got num_bins_y: " << num_bins_y
761
760
<< " , num_bins_x: " << num_bins_x << " , "
@@ -800,7 +799,7 @@ at::Tensor RasterizeMeshesCoarseCuda(
800
799
// ****************************************************************************
801
800
__global__ void RasterizeMeshesFineCudaKernel (
802
801
const float * face_verts, // (F, 3, 3)
803
- const int32_t * bin_faces, // (N, B, B , T)
802
+ const int32_t * bin_faces, // (N, BH, BW , T)
804
803
const float blur_radius,
805
804
const int bin_size,
806
805
const bool perspective_correct,
@@ -813,12 +812,12 @@ __global__ void RasterizeMeshesFineCudaKernel(
813
812
const int H,
814
813
const int W,
815
814
const int K,
816
- int64_t * face_idxs, // (N, S, S , K)
817
- float * zbuf, // (N, S, S , K)
818
- float * pix_dists, // (N, S, S , K)
819
- float * bary // (N, S, S , K, 3)
815
+ int64_t * face_idxs, // (N, H, W , K)
816
+ float * zbuf, // (N, H, W , K)
817
+ float * pix_dists, // (N, H, W , K)
818
+ float * bary // (N, H, W , K, 3)
820
819
) {
821
- // This can be more than S^2 if S % bin_size != 0
820
+ // This can be more than H * W if H or W are not divisible by bin_size.
822
821
int num_pixels = N * BH * BW * bin_size * bin_size;
823
822
int num_threads = gridDim .x * blockDim .x ;
824
823
int tid = blockIdx .x * blockDim .x + threadIdx .x ;
0 commit comments